From ce08521490fcdefd2bc81d241a5c445137deee1c Mon Sep 17 00:00:00 2001
From: Callum Styan <callumstyan@gmail.com>
Date: Thu, 16 May 2019 14:39:10 -0700
Subject: [PATCH 01/16] Move WAL Watcher from Prometheus to TSDB WAL package.

Signed-off-by: Callum Styan <callumstyan@gmail.com>
---
 block.go                                      |  25 +-
 cmd/tsdb/main.go                              |   3 +-
 compact.go                                    |  19 +-
 compact_test.go                               |   1 +
 db_test.go                                    |  45 +-
 go.mod                                        |   1 +
 go.sum                                        |   6 +-
 head.go                                       | 387 +++---------
 head_test.go                                  | 173 +++---
 mocks_test.go                                 |   1 +
 querier.go                                    |  33 +-
 querier_test.go                               |  58 +-
 record/internal.go                            | 371 ++++++++++++
 record.go => record/record.go                 |  10 +-
 record_test.go => record/record_test.go       |  16 +-
 tombstones.go => record/tombstones.go         |  60 +-
 .../tombstones_test.go                        |  18 +-
 wal.go                                        | 112 ++--
 checkpoint.go => wal/checkpoint.go            |  40 +-
 checkpoint_test.go => wal/checkpoint_test.go  |  36 +-
 wal/reader_test.go                            |  22 +-
 wal/wal_watcher.go                            | 556 ++++++++++++++++++
 wal/wal_watcher_test.go                       | 509 ++++++++++++++++
 wal_test.go                                   | 103 ++--
 24 files changed, 1925 insertions(+), 680 deletions(-)
 create mode 100644 record/internal.go
 rename record.go => record/record.go (97%)
 rename record_test.go => record/record_test.go (89%)
 rename tombstones.go => record/tombstones.go (80%)
 rename tombstones_test.go => record/tombstones_test.go (89%)
 rename checkpoint.go => wal/checkpoint.go (88%)
 rename checkpoint_test.go => wal/checkpoint_test.go (89%)
 create mode 100644 wal/wal_watcher.go
 create mode 100644 wal/wal_watcher_test.go

diff --git a/block.go b/block.go
index d0fe2b2f..516bd00b 100644
--- a/block.go
+++ b/block.go
@@ -32,6 +32,7 @@ import (
 	"github.com/prometheus/tsdb/fileutil"
 	"github.com/prometheus/tsdb/index"
 	"github.com/prometheus/tsdb/labels"
+	"github.com/prometheus/tsdb/record"
 )
 
 // IndexWriter serializes the index for a block of series data.
@@ -136,7 +137,7 @@ type BlockReader interface {
 	Chunks() (ChunkReader, error)
 
 	// Tombstones returns a TombstoneReader over the block's deleted data.
-	Tombstones() (TombstoneReader, error)
+	Tombstones() (record.TombstoneReader, error)
 
 	// Meta provides meta information about the block reader.
 	Meta() BlockMeta
@@ -278,7 +279,7 @@ type Block struct {
 
 	chunkr     ChunkReader
 	indexr     IndexReader
-	tombstones TombstoneReader
+	tombstones record.TombstoneReader
 
 	logger log.Logger
 
@@ -320,7 +321,7 @@ func OpenBlock(logger log.Logger, dir string, pool chunkenc.Pool) (pb *Block, er
 	}
 	closers = append(closers, ir)
 
-	tr, sizeTomb, err := readTombstones(dir)
+	tr, sizeTomb, err := record.ReadTombstones(dir)
 	if err != nil {
 		return nil, err
 	}
@@ -411,7 +412,7 @@ func (pb *Block) Chunks() (ChunkReader, error) {
 }
 
 // Tombstones returns a new TombstoneReader against the block data.
-func (pb *Block) Tombstones() (TombstoneReader, error) {
+func (pb *Block) Tombstones() (record.TombstoneReader, error) {
 	if err := pb.startRead(); err != nil {
 		return nil, err
 	}
@@ -482,7 +483,7 @@ func (r blockIndexReader) Close() error {
 }
 
 type blockTombstoneReader struct {
-	TombstoneReader
+	record.TombstoneReader
 	b *Block
 }
 
@@ -518,7 +519,7 @@ func (pb *Block) Delete(mint, maxt int64, ms ...labels.Matcher) error {
 	ir := pb.indexr
 
 	// Choose only valid postings which have chunks in the time-range.
-	stones := newMemTombstones()
+	stones := record.NewMemTombstones()
 
 	var lset labels.Labels
 	var chks []chunks.Meta
@@ -534,7 +535,7 @@ Outer:
 			if chk.OverlapsClosedInterval(mint, maxt) {
 				// Delete only until the current values and not beyond.
 				tmin, tmax := clampInterval(mint, maxt, chks[0].MinTime, chks[len(chks)-1].MaxTime)
-				stones.addInterval(p.At(), Interval{tmin, tmax})
+				stones.AddInterval(p.At(), record.Interval{tmin, tmax})
 				continue Outer
 			}
 		}
@@ -544,9 +545,9 @@ Outer:
 		return p.Err()
 	}
 
-	err = pb.tombstones.Iter(func(id uint64, ivs Intervals) error {
+	err = pb.tombstones.Iter(func(id uint64, ivs record.Intervals) error {
 		for _, iv := range ivs {
-			stones.addInterval(id, iv)
+			stones.AddInterval(id, iv)
 		}
 		return nil
 	})
@@ -556,7 +557,7 @@ Outer:
 	pb.tombstones = stones
 	pb.meta.Stats.NumTombstones = pb.tombstones.Total()
 
-	n, err := writeTombstoneFile(pb.logger, pb.dir, pb.tombstones)
+	n, err := record.WriteTombstoneFile(pb.logger, pb.dir, pb.tombstones)
 	if err != nil {
 		return err
 	}
@@ -574,7 +575,7 @@ Outer:
 func (pb *Block) CleanTombstones(dest string, c Compactor) (*ulid.ULID, error) {
 	numStones := 0
 
-	if err := pb.tombstones.Iter(func(id uint64, ivs Intervals) error {
+	if err := pb.tombstones.Iter(func(id uint64, ivs record.Intervals) error {
 		numStones += len(ivs)
 		return nil
 	}); err != nil {
@@ -609,7 +610,7 @@ func (pb *Block) Snapshot(dir string) error {
 	for _, fname := range []string{
 		metaFilename,
 		indexFilename,
-		tombstoneFilename,
+		record.TombstoneFilename,
 	} {
 		if err := os.Link(filepath.Join(pb.dir, fname), filepath.Join(blockDir, fname)); err != nil {
 			return errors.Wrapf(err, "create snapshot %s", fname)
diff --git a/cmd/tsdb/main.go b/cmd/tsdb/main.go
index e3dc530a..829891ef 100644
--- a/cmd/tsdb/main.go
+++ b/cmd/tsdb/main.go
@@ -36,6 +36,7 @@ import (
 	"github.com/prometheus/tsdb/chunks"
 	tsdb_errors "github.com/prometheus/tsdb/errors"
 	"github.com/prometheus/tsdb/labels"
+	"github.com/prometheus/tsdb/record"
 	"gopkg.in/alecthomas/kingpin.v2"
 )
 
@@ -306,7 +307,7 @@ func (b *writeBenchmark) ingestScrapesShard(lbls []labels.Labels, scrapeCount in
 				s.ref = &ref
 			} else if err := app.AddFast(*s.ref, ts, float64(s.value)); err != nil {
 
-				if errors.Cause(err) != tsdb.ErrNotFound {
+				if errors.Cause(err) != record.ErrNotFound {
 					panic(err)
 				}
 
diff --git a/compact.go b/compact.go
index 9443c99e..3e2652fd 100644
--- a/compact.go
+++ b/compact.go
@@ -35,6 +35,7 @@ import (
 	"github.com/prometheus/tsdb/fileutil"
 	"github.com/prometheus/tsdb/index"
 	"github.com/prometheus/tsdb/labels"
+	"github.com/prometheus/tsdb/record"
 )
 
 // ExponentialBlockRanges returns the time ranges based on the stepSize.
@@ -607,7 +608,7 @@ func (c *LeveledCompactor) write(dest string, meta *BlockMeta, blocks ...BlockRe
 	}
 
 	// Create an empty tombstones file.
-	if _, err := writeTombstoneFile(c.logger, tmp, newMemTombstones()); err != nil {
+	if _, err := record.WriteTombstoneFile(c.logger, tmp, record.NewMemTombstones()); err != nil {
 		return errors.Wrap(err, "write new tombstones file")
 	}
 
@@ -876,15 +877,15 @@ type compactionSeriesSet struct {
 	p          index.Postings
 	index      IndexReader
 	chunks     ChunkReader
-	tombstones TombstoneReader
+	tombstones record.TombstoneReader
 
 	l         labels.Labels
 	c         []chunks.Meta
-	intervals Intervals
+	intervals record.Intervals
 	err       error
 }
 
-func newCompactionSeriesSet(i IndexReader, c ChunkReader, t TombstoneReader, p index.Postings) *compactionSeriesSet {
+func newCompactionSeriesSet(i IndexReader, c ChunkReader, t record.TombstoneReader, p index.Postings) *compactionSeriesSet {
 	return &compactionSeriesSet{
 		index:      i,
 		chunks:     c,
@@ -914,7 +915,7 @@ func (c *compactionSeriesSet) Next() bool {
 	if len(c.intervals) > 0 {
 		chks := make([]chunks.Meta, 0, len(c.c))
 		for _, chk := range c.c {
-			if !(Interval{chk.MinTime, chk.MaxTime}.isSubrange(c.intervals)) {
+			if !(record.Interval{chk.MinTime, chk.MaxTime}.IsSubrange(c.intervals)) {
 				chks = append(chks, chk)
 			}
 		}
@@ -942,7 +943,7 @@ func (c *compactionSeriesSet) Err() error {
 	return c.p.Err()
 }
 
-func (c *compactionSeriesSet) At() (labels.Labels, []chunks.Meta, Intervals) {
+func (c *compactionSeriesSet) At() (labels.Labels, []chunks.Meta, record.Intervals) {
 	return c.l, c.c, c.intervals
 }
 
@@ -952,7 +953,7 @@ type compactionMerger struct {
 	aok, bok  bool
 	l         labels.Labels
 	c         []chunks.Meta
-	intervals Intervals
+	intervals record.Intervals
 }
 
 func newCompactionMerger(a, b ChunkSeriesSet) (*compactionMerger, error) {
@@ -1008,7 +1009,7 @@ func (c *compactionMerger) Next() bool {
 		_, cb, rb := c.b.At()
 
 		for _, r := range rb {
-			ra = ra.add(r)
+			ra = ra.Add(r)
 		}
 
 		c.l = append(c.l[:0], l...)
@@ -1029,6 +1030,6 @@ func (c *compactionMerger) Err() error {
 	return c.b.Err()
 }
 
-func (c *compactionMerger) At() (labels.Labels, []chunks.Meta, Intervals) {
+func (c *compactionMerger) At() (labels.Labels, []chunks.Meta, record.Intervals) {
 	return c.l, c.c, c.intervals
 }
diff --git a/compact_test.go b/compact_test.go
index 18990ed5..bee741e6 100644
--- a/compact_test.go
+++ b/compact_test.go
@@ -30,6 +30,7 @@ import (
 	"github.com/prometheus/tsdb/chunks"
 	"github.com/prometheus/tsdb/fileutil"
 	"github.com/prometheus/tsdb/labels"
+	"github.com/prometheus/tsdb/record"
 	"github.com/prometheus/tsdb/testutil"
 )
 
diff --git a/db_test.go b/db_test.go
index 25fb8a7e..7e1e1b96 100644
--- a/db_test.go
+++ b/db_test.go
@@ -33,6 +33,7 @@ import (
 	"github.com/prometheus/tsdb/chunks"
 	"github.com/prometheus/tsdb/index"
 	"github.com/prometheus/tsdb/labels"
+	"github.com/prometheus/tsdb/record"
 	"github.com/prometheus/tsdb/testutil"
 	"github.com/prometheus/tsdb/tsdbutil"
 	"github.com/prometheus/tsdb/wal"
@@ -196,7 +197,7 @@ func TestDBAppenderAddRef(t *testing.T) {
 	testutil.Ok(t, err)
 
 	err = app2.AddFast(9999999, 1, 1)
-	testutil.Equals(t, ErrNotFound, errors.Cause(err))
+	testutil.Equals(t, record.ErrNotFound, errors.Cause(err))
 
 	testutil.Ok(t, app2.Commit())
 
@@ -243,27 +244,27 @@ func TestDeleteSimple(t *testing.T) {
 	numSamples := int64(10)
 
 	cases := []struct {
-		intervals Intervals
+		intervals record.Intervals
 		remaint   []int64
 	}{
 		{
-			intervals: Intervals{{0, 3}},
+			intervals: record.Intervals{{0, 3}},
 			remaint:   []int64{4, 5, 6, 7, 8, 9},
 		},
 		{
-			intervals: Intervals{{1, 3}},
+			intervals: record.Intervals{{1, 3}},
 			remaint:   []int64{0, 4, 5, 6, 7, 8, 9},
 		},
 		{
-			intervals: Intervals{{1, 3}, {4, 7}},
+			intervals: record.Intervals{{1, 3}, {4, 7}},
 			remaint:   []int64{0, 8, 9},
 		},
 		{
-			intervals: Intervals{{1, 3}, {4, 700}},
+			intervals: record.Intervals{{1, 3}, {4, 700}},
 			remaint:   []int64{0},
 		},
 		{ // This case is to ensure that labels and symbols are deleted.
-			intervals: Intervals{{0, 9}},
+			intervals: record.Intervals{{0, 9}},
 			remaint:   []int64{},
 		},
 	}
@@ -359,7 +360,7 @@ func TestAmendDatapointCausesError(t *testing.T) {
 
 	app = db.Appender()
 	_, err = app.Add(labels.Labels{}, 0, 1)
-	testutil.Equals(t, ErrAmendSample, err)
+	testutil.Equals(t, record.ErrAmendSample, err)
 	testutil.Ok(t, app.Rollback())
 }
 
@@ -393,7 +394,7 @@ func TestNonDuplicateNaNDatapointsCausesAmendError(t *testing.T) {
 
 	app = db.Appender()
 	_, err = app.Add(labels.Labels{}, 0, math.Float64frombits(0x7ff0000000000002))
-	testutil.Equals(t, ErrAmendSample, err)
+	testutil.Equals(t, record.ErrAmendSample, err)
 }
 
 func TestSkippingInvalidValuesInSameTxn(t *testing.T) {
@@ -561,11 +562,11 @@ func TestDB_SnapshotWithDelete(t *testing.T) {
 
 	testutil.Ok(t, app.Commit())
 	cases := []struct {
-		intervals Intervals
+		intervals record.Intervals
 		remaint   []int64
 	}{
 		{
-			intervals: Intervals{{1, 3}, {4, 7}},
+			intervals: record.Intervals{{1, 3}, {4, 7}},
 			remaint:   []int64{0, 8, 9},
 		},
 	}
@@ -888,11 +889,11 @@ func TestTombstoneClean(t *testing.T) {
 
 	testutil.Ok(t, app.Commit())
 	cases := []struct {
-		intervals Intervals
+		intervals record.Intervals
 		remaint   []int64
 	}{
 		{
-			intervals: Intervals{{1, 3}, {4, 7}},
+			intervals: record.Intervals{{1, 3}, {4, 7}},
 			remaint:   []int64{0, 8, 9},
 		},
 	}
@@ -964,7 +965,7 @@ func TestTombstoneClean(t *testing.T) {
 		}
 
 		for _, b := range db.Blocks() {
-			testutil.Equals(t, newMemTombstones(), b.tombstones)
+			testutil.Equals(t, record.NewMemTombstones(), b.tombstones)
 		}
 	}
 }
@@ -990,8 +991,8 @@ func TestTombstoneCleanFail(t *testing.T) {
 		block, err := OpenBlock(nil, blockDir, nil)
 		testutil.Ok(t, err)
 		// Add some some fake tombstones to trigger the compaction.
-		tomb := newMemTombstones()
-		tomb.addInterval(0, Interval{0, 1})
+		tomb := record.NewMemTombstones()
+		tomb.AddInterval(0, record.Interval{0, 1})
 		block.tombstones = tomb
 
 		db.blocks = append(db.blocks, block)
@@ -1470,13 +1471,13 @@ func TestInitializeHeadTimestamp(t *testing.T) {
 		w, err := wal.New(nil, nil, path.Join(dir, "wal"), false)
 		testutil.Ok(t, err)
 
-		var enc RecordEncoder
+		var enc record.RecordEncoder
 		err = w.Log(
-			enc.Series([]RefSeries{
+			enc.Series([]record.RefSeries{
 				{Ref: 123, Labels: labels.FromStrings("a", "1")},
 				{Ref: 124, Labels: labels.FromStrings("a", "2")},
 			}, nil),
-			enc.Samples([]RefSample{
+			enc.Samples([]record.RefSample{
 				{Ref: 123, T: 5000, V: 1},
 				{Ref: 124, T: 15000, V: 1},
 			}, nil),
@@ -1520,13 +1521,13 @@ func TestInitializeHeadTimestamp(t *testing.T) {
 		w, err := wal.New(nil, nil, path.Join(dir, "wal"), false)
 		testutil.Ok(t, err)
 
-		var enc RecordEncoder
+		var enc record.RecordEncoder
 		err = w.Log(
-			enc.Series([]RefSeries{
+			enc.Series([]record.RefSeries{
 				{Ref: 123, Labels: labels.FromStrings("a", "1")},
 				{Ref: 124, Labels: labels.FromStrings("a", "2")},
 			}, nil),
-			enc.Samples([]RefSample{
+			enc.Samples([]record.RefSample{
 				{Ref: 123, T: 5000, V: 1},
 				{Ref: 124, T: 15000, V: 1},
 			}, nil),
diff --git a/go.mod b/go.mod
index ccdd4372..c75e4ed7 100644
--- a/go.mod
+++ b/go.mod
@@ -8,6 +8,7 @@ require (
 	github.com/oklog/ulid v1.3.1
 	github.com/pkg/errors v0.8.0
 	github.com/prometheus/client_golang v1.0.0
+	github.com/prometheus/prometheus v2.5.0+incompatible
 	golang.org/x/sync v0.0.0-20181221193216-37e7f081c4d4
 	golang.org/x/sys v0.0.0-20181116152217-5ac8a444bdc5
 	gopkg.in/alecthomas/kingpin.v2 v2.2.6
diff --git a/go.sum b/go.sum
index 365fa5ec..ad7f9516 100644
--- a/go.sum
+++ b/go.sum
@@ -27,11 +27,11 @@ github.com/golang/protobuf v1.2.0 h1:P3YflyNX/ehuJFLhxviNdFxQPkGK5cDcApsge1SqnvM
 github.com/golang/protobuf v1.2.0/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U=
 github.com/golang/protobuf v1.3.1 h1:YF8+flBXS5eO826T4nzqPrxfhQThhXl0YzfuUPu4SBg=
 github.com/golang/protobuf v1.3.1/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U=
+github.com/golang/snappy v0.0.1 h1:Qgr9rKW7uDUkrbSmQeiDsGa8SjGyCOGtuasMWwvp2P4=
+github.com/golang/snappy v0.0.1/go.mod h1:/XxbfmMg8lxefKM7IXC3fBNl/7bRcc72aCRzEWrmP2Q=
 github.com/json-iterator/go v1.1.6/go.mod h1:+SdeFBvtyEkXs7REEP0seUULqWtbJapLOCVDaaPEHmU=
 github.com/julienschmidt/httprouter v1.2.0/go.mod h1:SYymIcj16QtmaHHD7aYtjjsJG7VTCxuUUipMqKk8s4w=
 github.com/konsorten/go-windows-terminal-sequences v1.0.1/go.mod h1:T0+1ngSBFLxvqU3pZ+m/2kptfBszLMUkC4ZK/EgS/cQ=
-github.com/golang/snappy v0.0.1 h1:Qgr9rKW7uDUkrbSmQeiDsGa8SjGyCOGtuasMWwvp2P4=
-github.com/golang/snappy v0.0.1/go.mod h1:/XxbfmMg8lxefKM7IXC3fBNl/7bRcc72aCRzEWrmP2Q=
 github.com/kr/logfmt v0.0.0-20140226030751-b84e30acd515 h1:T+h1c/A9Gawja4Y9mFVWj2vyii2bbUNDw3kt9VxK2EY=
 github.com/kr/logfmt v0.0.0-20140226030751-b84e30acd515/go.mod h1:+0opPa2QZZtGFBFZlji/RkVcI2GknAs/DXo4wKdlNEc=
 github.com/matttproud/golang_protobuf_extensions v1.0.1 h1:4hp9jkHxhMHkqkrB3Ix0jegS5sx/RkqARlsWZ6pIwiU=
@@ -59,6 +59,8 @@ github.com/prometheus/procfs v0.0.0-20181005140218-185b4288413d h1:GoAlyOgbOEIFd
 github.com/prometheus/procfs v0.0.0-20181005140218-185b4288413d/go.mod h1:c3At6R/oaqEKCNdg8wHV1ftS6bRYblBhIjjI8uT2IGk=
 github.com/prometheus/procfs v0.0.2 h1:6LJUbpNm42llc4HRCuvApCSWB/WfhuNo9K98Q9sNGfs=
 github.com/prometheus/procfs v0.0.2/go.mod h1:TjEm7ze935MbeOT/UhFTIMYKhuLP4wbCsTZCD3I8kEA=
+github.com/prometheus/prometheus v2.5.0+incompatible h1:7QPitgO2kOFG8ecuRn9O/4L9+10He72rVRJvMXrE9Hg=
+github.com/prometheus/prometheus v2.5.0+incompatible/go.mod h1:oAIUtOny2rjMX0OWN5vPR5/q/twIROJvdqnQKDdil/s=
 github.com/sirupsen/logrus v1.2.0/go.mod h1:LxeOpSwHxABJmUn/MG1IvRgCAasNZTLOkJPxbbu5VWo=
 github.com/spaolacci/murmur3 v0.0.0-20180118202830-f09979ecbc72 h1:qLC7fQah7D6K1B0ujays3HV9gkFtllcxhzImRR7ArPQ=
 github.com/spaolacci/murmur3 v0.0.0-20180118202830-f09979ecbc72/go.mod h1:JwIasOWyU6f++ZhiEuf87xNszmSA2myDM2Kzu9HwQUA=
diff --git a/head.go b/head.go
index 0adb8847..c1c8e6d5 100644
--- a/head.go
+++ b/head.go
@@ -33,28 +33,18 @@ import (
 	"github.com/prometheus/tsdb/encoding"
 	"github.com/prometheus/tsdb/index"
 	"github.com/prometheus/tsdb/labels"
+	"github.com/prometheus/tsdb/record"
 	"github.com/prometheus/tsdb/wal"
 )
 
 var (
-	// ErrNotFound is returned if a looked up resource was not found.
-	ErrNotFound = errors.Errorf("not found")
-
-	// ErrOutOfOrderSample is returned if an appended sample has a
-	// timestamp smaller than the most recent sample.
-	ErrOutOfOrderSample = errors.New("out of order sample")
-
-	// ErrAmendSample is returned if an appended sample has the same timestamp
-	// as the most recent sample but a different value.
-	ErrAmendSample = errors.New("amending sample")
-
 	// ErrOutOfBounds is returned if an appended sample is out of the
 	// writable time range.
 	ErrOutOfBounds = errors.New("out of bounds")
 
 	// emptyTombstoneReader is a no-op Tombstone Reader.
 	// This is used by head to satisfy the Tombstones() function call.
-	emptyTombstoneReader = newMemTombstones()
+	emptyTombstoneReader = record.NewMemTombstones()
 )
 
 // Head handles reads and writes of time series data within a time window.
@@ -256,12 +246,12 @@ func NewHead(r prometheus.Registerer, l log.Logger, wal *wal.WAL, chunkRange int
 // Samples before the mint timestamp are discarded.
 func (h *Head) processWALSamples(
 	minValidTime int64,
-	input <-chan []RefSample, output chan<- []RefSample,
+	input <-chan []record.RefSample, output chan<- []record.RefSample,
 ) (unknownRefs uint64) {
 	defer close(output)
 
 	// Mitigate lock contention in getByID.
-	refSeries := map[uint64]*memSeries{}
+	refSeries := map[uint64]*record.MemSeries{}
 
 	mint, maxt := int64(math.MaxInt64), int64(math.MinInt64)
 
@@ -279,7 +269,7 @@ func (h *Head) processWALSamples(
 				}
 				refSeries[s.Ref] = ms
 			}
-			_, chunkCreated := ms.append(s.T, s.V)
+			_, chunkCreated := ms.Append(s.T, s.V)
 			if chunkCreated {
 				h.metrics.chunksCreated.Inc()
 				h.metrics.chunks.Inc()
@@ -331,8 +321,8 @@ func (h *Head) loadWAL(r *wal.Reader, multiRef map[uint64]uint64) (err error) {
 		wg           sync.WaitGroup
 		multiRefLock sync.Mutex
 		n            = runtime.GOMAXPROCS(0)
-		inputs       = make([]chan []RefSample, n)
-		outputs      = make([]chan []RefSample, n)
+		inputs       = make([]chan []record.RefSample, n)
+		outputs      = make([]chan []record.RefSample, n)
 	)
 	wg.Add(n)
 
@@ -349,10 +339,10 @@ func (h *Head) loadWAL(r *wal.Reader, multiRef map[uint64]uint64) (err error) {
 	}()
 
 	for i := 0; i < n; i++ {
-		outputs[i] = make(chan []RefSample, 300)
-		inputs[i] = make(chan []RefSample, 300)
+		outputs[i] = make(chan []record.RefSample, 300)
+		inputs[i] = make(chan []record.RefSample, 300)
 
-		go func(input <-chan []RefSample, output chan<- []RefSample) {
+		go func(input <-chan []record.RefSample, output chan<- []record.RefSample) {
 			unknown := h.processWALSamples(h.minValidTime, input, output)
 			atomic.AddUint64(&unknownRefs, unknown)
 			wg.Done()
@@ -376,7 +366,7 @@ func (h *Head) loadWAL(r *wal.Reader, multiRef map[uint64]uint64) (err error) {
 		rec := r.Record()
 
 		switch dec.Type(rec) {
-		case RecordSeries:
+		case record.RecordSeries:
 			series, err = dec.Series(rec, series)
 			if err != nil {
 				return &wal.CorruptionErr{
@@ -399,7 +389,7 @@ func (h *Head) loadWAL(r *wal.Reader, multiRef map[uint64]uint64) (err error) {
 					h.lastSeriesID = s.Ref
 				}
 			}
-		case RecordSamples:
+		case record.RecordSamples:
 			samples, err = dec.Samples(rec, samples)
 			s := samples
 			if err != nil {
@@ -418,9 +408,9 @@ func (h *Head) loadWAL(r *wal.Reader, multiRef map[uint64]uint64) (err error) {
 				if len(samples) < m {
 					m = len(samples)
 				}
-				shards := make([][]RefSample, n)
+				shards := make([][]record.RefSample, n)
 				for i := 0; i < n; i++ {
-					var buf []RefSample
+					var buf []record.RefSample
 					select {
 					case buf = <-outputs[i]:
 					default:
@@ -440,7 +430,7 @@ func (h *Head) loadWAL(r *wal.Reader, multiRef map[uint64]uint64) (err error) {
 				samples = samples[m:]
 			}
 			samples = s // Keep whole slice for reuse.
-		case RecordTombstones:
+		case record.RecordTombstones:
 			tstones, err = dec.Tombstones(rec, tstones)
 			if err != nil {
 				return &wal.CorruptionErr{
@@ -450,15 +440,15 @@ func (h *Head) loadWAL(r *wal.Reader, multiRef map[uint64]uint64) (err error) {
 				}
 			}
 			for _, s := range tstones {
-				for _, itv := range s.intervals {
+				for _, itv := range s.Intervals {
 					if itv.Maxt < h.minValidTime {
 						continue
 					}
-					if m := h.series.getByID(s.ref); m == nil {
+					if m := h.series.getByID(s.Ref); m == nil {
 						unknownRefs++
 						continue
 					}
-					allStones.addInterval(s.ref, itv)
+					allStones.AddInterval(s.Ref, itv)
 				}
 			}
 		default:
@@ -508,8 +498,8 @@ func (h *Head) Init(minValidTime int64) error {
 
 	level.Info(h.logger).Log("msg", "replaying WAL, this may take awhile")
 	// Backfill the checkpoint first if it exists.
-	dir, startFrom, err := LastCheckpoint(h.wal.Dir())
-	if err != nil && err != ErrNotFound {
+	dir, startFrom, err := wal.LastCheckpoint(h.wal.Dir())
+	if err != nil && err != record.ErrNotFound {
 		return errors.Wrap(err, "find last checkpoint")
 	}
 	multiRef := map[uint64]uint64{}
@@ -629,7 +619,7 @@ func (h *Head) Truncate(mint int64) (err error) {
 		return ok
 	}
 	h.metrics.checkpointCreationTotal.Inc()
-	if _, err = Checkpoint(h.wal, first, last, keep, mint); err != nil {
+	if _, err = wal.Checkpoint(h.wal, first, last, keep, mint); err != nil {
 		h.metrics.checkpointCreationFail.Inc()
 		return errors.Wrap(err, "create checkpoint")
 	}
@@ -651,7 +641,7 @@ func (h *Head) Truncate(mint int64) (err error) {
 	h.deletedMtx.Unlock()
 
 	h.metrics.checkpointDeleteTotal.Inc()
-	if err := DeleteCheckpoints(h.wal.Dir(), last); err != nil {
+	if err := wal.DeleteCheckpoints(h.wal.Dir(), last); err != nil {
 		// Leftover old checkpoints do not cause problems down the line beyond
 		// occupying disk space.
 		// They will just be ignored since a higher checkpoint exists.
@@ -693,7 +683,7 @@ func (h *rangeHead) Chunks() (ChunkReader, error) {
 	return h.head.chunksRange(h.mint, h.maxt), nil
 }
 
-func (h *rangeHead) Tombstones() (TombstoneReader, error) {
+func (h *rangeHead) Tombstones() (record.TombstoneReader, error) {
 	return emptyTombstoneReader, nil
 }
 
@@ -739,7 +729,7 @@ func (a *initAppender) Add(lset labels.Labels, t int64, v float64) (uint64, erro
 
 func (a *initAppender) AddFast(ref uint64, t int64, v float64) error {
 	if a.app == nil {
-		return ErrNotFound
+		return record.ErrNotFound
 	}
 	return a.app.AddFast(ref, t, v)
 }
@@ -789,15 +779,15 @@ func max(a, b int64) int64 {
 	return b
 }
 
-func (h *Head) getAppendBuffer() []RefSample {
+func (h *Head) getAppendBuffer() []record.RefSample {
 	b := h.appendPool.Get()
 	if b == nil {
-		return make([]RefSample, 0, 512)
+		return make([]record.RefSample, 0, 512)
 	}
-	return b.([]RefSample)
+	return b.([]record.RefSample)
 }
 
-func (h *Head) putAppendBuffer(b []RefSample) {
+func (h *Head) putAppendBuffer(b []record.RefSample) {
 	//lint:ignore SA6002 safe to ignore and actually fixing it has some performance penalty.
 	h.appendPool.Put(b[:0])
 }
@@ -820,8 +810,8 @@ type headAppender struct {
 	minValidTime int64 // No samples below this timestamp are allowed.
 	mint, maxt   int64
 
-	series  []RefSeries
-	samples []RefSample
+	series  []record.RefSeries
+	samples []record.RefSample
 }
 
 func (a *headAppender) Add(lset labels.Labels, t int64, v float64) (uint64, error) {
@@ -834,12 +824,12 @@ func (a *headAppender) Add(lset labels.Labels, t int64, v float64) (uint64, erro
 
 	s, created := a.head.getOrCreate(lset.Hash(), lset)
 	if created {
-		a.series = append(a.series, RefSeries{
-			Ref:    s.ref,
+		a.series = append(a.series, record.RefSeries{
+			Ref:    s.Ref,
 			Labels: lset,
 		})
 	}
-	return s.ref, a.AddFast(s.ref, t, v)
+	return s.Ref, a.AddFast(s.Ref, t, v)
 }
 
 func (a *headAppender) AddFast(ref uint64, t int64, v float64) error {
@@ -849,14 +839,14 @@ func (a *headAppender) AddFast(ref uint64, t int64, v float64) error {
 
 	s := a.head.series.getByID(ref)
 	if s == nil {
-		return errors.Wrap(ErrNotFound, "unknown series")
+		return errors.Wrap(record.ErrNotFound, "unknown series")
 	}
 	s.Lock()
-	if err := s.appendable(t, v); err != nil {
+	if err := s.Appendable(t, v); err != nil {
 		s.Unlock()
 		return err
 	}
-	s.pendingCommit = true
+	s.PendingCommit = true
 	s.Unlock()
 
 	if t < a.mint {
@@ -866,11 +856,11 @@ func (a *headAppender) AddFast(ref uint64, t int64, v float64) error {
 		a.maxt = t
 	}
 
-	a.samples = append(a.samples, RefSample{
+	a.samples = append(a.samples, record.RefSample{
 		Ref:    ref,
 		T:      t,
 		V:      v,
-		series: s,
+		Series: s,
 	})
 	return nil
 }
@@ -884,7 +874,7 @@ func (a *headAppender) log() error {
 	defer func() { a.head.putBytesBuffer(buf) }()
 
 	var rec []byte
-	var enc RecordEncoder
+	var enc record.RecordEncoder
 
 	if len(a.series) > 0 {
 		rec = enc.Series(a.series, buf)
@@ -916,10 +906,10 @@ func (a *headAppender) Commit() error {
 	total := len(a.samples)
 
 	for _, s := range a.samples {
-		s.series.Lock()
-		ok, chunkCreated := s.series.append(s.T, s.V)
-		s.series.pendingCommit = false
-		s.series.Unlock()
+		s.Series.Lock()
+		ok, chunkCreated := s.Series.Append(s.T, s.V)
+		s.Series.PendingCommit = false
+		s.Series.Unlock()
 
 		if !ok {
 			total--
@@ -939,9 +929,9 @@ func (a *headAppender) Commit() error {
 func (a *headAppender) Rollback() error {
 	a.head.metrics.activeAppenders.Dec()
 	for _, s := range a.samples {
-		s.series.Lock()
-		s.series.pendingCommit = false
-		s.series.Unlock()
+		s.Series.Lock()
+		s.Series.PendingCommit = false
+		s.Series.Unlock()
 	}
 	a.head.putAppendBuffer(a.samples)
 
@@ -964,21 +954,21 @@ func (h *Head) Delete(mint, maxt int64, ms ...labels.Matcher) error {
 		return errors.Wrap(err, "select series")
 	}
 
-	var stones []Stone
+	var stones []record.Stone
 	dirty := false
 	for p.Next() {
 		series := h.series.getByID(p.At())
 
-		t0, t1 := series.minTime(), series.maxTime()
+		t0, t1 := series.MinTime(), series.MaxTime()
 		if t0 == math.MinInt64 || t1 == math.MinInt64 {
 			continue
 		}
 		// Delete only until the current values and not beyond.
 		t0, t1 = clampInterval(mint, maxt, t0, t1)
 		if h.wal != nil {
-			stones = append(stones, Stone{p.At(), Intervals{{t0, t1}}})
+			stones = append(stones, record.Stone{p.At(), record.Intervals{{t0, t1}}})
 		}
-		if err := h.chunkRewrite(p.At(), Intervals{{t0, t1}}); err != nil {
+		if err := h.chunkRewrite(p.At(), record.Intervals{{t0, t1}}); err != nil {
 			return errors.Wrap(err, "delete samples")
 		}
 		dirty = true
@@ -986,7 +976,7 @@ func (h *Head) Delete(mint, maxt int64, ms ...labels.Matcher) error {
 	if p.Err() != nil {
 		return p.Err()
 	}
-	var enc RecordEncoder
+	var enc record.RecordEncoder
 	if h.wal != nil {
 		// Although we don't store the stones in the head
 		// we need to write them to the WAL to mark these as deleted
@@ -1005,7 +995,7 @@ func (h *Head) Delete(mint, maxt int64, ms ...labels.Matcher) error {
 // chunkRewrite re-writes the chunks which overlaps with deleted ranges
 // and removes the samples in the deleted ranges.
 // Chunks is deleted if no samples are left at the end.
-func (h *Head) chunkRewrite(ref uint64, dranges Intervals) (err error) {
+func (h *Head) chunkRewrite(ref uint64, dranges record.Intervals) (err error) {
 	if len(dranges) == 0 {
 		return nil
 	}
@@ -1013,18 +1003,18 @@ func (h *Head) chunkRewrite(ref uint64, dranges Intervals) (err error) {
 	ms := h.series.getByID(ref)
 	ms.Lock()
 	defer ms.Unlock()
-	if len(ms.chunks) == 0 {
+	if len(ms.Chunks) == 0 {
 		return nil
 	}
 
-	metas := ms.chunksMetas()
+	metas := ms.ChunksMetas()
 	mint, maxt := metas[0].MinTime, metas[len(metas)-1].MaxTime
 	it := newChunkSeriesIterator(metas, dranges, mint, maxt)
 
-	ms.reset()
+	ms.Reset()
 	for it.Next() {
 		t, v := it.At()
-		ok, _ := ms.append(t, v)
+		ok, _ := ms.Append(t, v)
 		if !ok {
 			level.Warn(h.logger).Log("msg", "failed to add sample during delete")
 		}
@@ -1097,7 +1087,7 @@ func (h *Head) gc() {
 }
 
 // Tombstones returns a new reader over the head's tombstones
-func (h *Head) Tombstones() (TombstoneReader, error) {
+func (h *Head) Tombstones() (record.TombstoneReader, error) {
 	return emptyTombstoneReader, nil
 }
 
@@ -1202,22 +1192,22 @@ func (h *headChunkReader) Chunk(ref uint64) (chunkenc.Chunk, error) {
 	s := h.head.series.getByID(sid)
 	// This means that the series has been garbage collected.
 	if s == nil {
-		return nil, ErrNotFound
+		return nil, record.ErrNotFound
 	}
 
 	s.Lock()
-	c := s.chunk(int(cid))
+	c := s.Chunk(int(cid))
 
 	// This means that the chunk has been garbage collected or is outside
 	// the specified range.
 	if c == nil || !c.OverlapsClosedInterval(h.mint, h.maxt) {
 		s.Unlock()
-		return nil, ErrNotFound
+		return nil, record.ErrNotFound
 	}
 	s.Unlock()
 
 	return &safeChunk{
-		Chunk: c.chunk,
+		Chunk: c.Chunk,
 		s:     s,
 		cid:   int(cid),
 	}, nil
@@ -1225,7 +1215,7 @@ func (h *headChunkReader) Chunk(ref uint64) (chunkenc.Chunk, error) {
 
 type safeChunk struct {
 	chunkenc.Chunk
-	s   *memSeries
+	s   *record.MemSeries
 	cid int
 }
 
@@ -1295,7 +1285,7 @@ func (h *headIndexReader) Postings(name, value string) (index.Postings, error) {
 }
 
 func (h *headIndexReader) SortedPostings(p index.Postings) index.Postings {
-	series := make([]*memSeries, 0, 128)
+	series := make([]*record.MemSeries, 0, 128)
 
 	// Fetch all the series only once.
 	for p.Next() {
@@ -1311,13 +1301,13 @@ func (h *headIndexReader) SortedPostings(p index.Postings) index.Postings {
 	}
 
 	sort.Slice(series, func(i, j int) bool {
-		return labels.Compare(series[i].lset, series[j].lset) < 0
+		return labels.Compare(series[i].Lset, series[j].Lset) < 0
 	})
 
 	// Convert back to list.
 	ep := make([]uint64, 0, len(series))
 	for _, p := range series {
-		ep = append(ep, p.ref)
+		ep = append(ep, p.Ref)
 	}
 	return index.NewListPostings(ep)
 }
@@ -1328,16 +1318,16 @@ func (h *headIndexReader) Series(ref uint64, lbls *labels.Labels, chks *[]chunks
 
 	if s == nil {
 		h.head.metrics.seriesNotFound.Inc()
-		return ErrNotFound
+		return record.ErrNotFound
 	}
-	*lbls = append((*lbls)[:0], s.lset...)
+	*lbls = append((*lbls)[:0], s.Lset...)
 
 	s.Lock()
 	defer s.Unlock()
 
 	*chks = (*chks)[:0]
 
-	for i, c := range s.chunks {
+	for i, c := range s.Chunks {
 		// Do not expose chunks that are outside of the specified range.
 		if !c.OverlapsClosedInterval(h.mint, h.maxt) {
 			continue
@@ -1368,7 +1358,7 @@ func (h *headIndexReader) LabelIndices() ([][]string, error) {
 	return res, nil
 }
 
-func (h *Head) getOrCreate(hash uint64, lset labels.Labels) (*memSeries, bool) {
+func (h *Head) getOrCreate(hash uint64, lset labels.Labels) (*record.MemSeries, bool) {
 	// Just using `getOrSet` below would be semantically sufficient, but we'd create
 	// a new series on every sample inserted via Add(), which causes allocations
 	// and makes our series IDs rather random and harder to compress in postings.
@@ -1383,8 +1373,8 @@ func (h *Head) getOrCreate(hash uint64, lset labels.Labels) (*memSeries, bool) {
 	return h.getOrCreateWithID(id, hash, lset)
 }
 
-func (h *Head) getOrCreateWithID(id, hash uint64, lset labels.Labels) (*memSeries, bool) {
-	s := newMemSeries(lset, id, h.chunkRange)
+func (h *Head) getOrCreateWithID(id, hash uint64, lset labels.Labels) (*record.MemSeries, bool) {
+	s := record.NewMemSeries(lset, id, h.chunkRange)
 
 	s, created := h.series.getOrSet(hash, s)
 	if !created {
@@ -1418,21 +1408,21 @@ func (h *Head) getOrCreateWithID(id, hash uint64, lset labels.Labels) (*memSerie
 // on top of a regular hashmap and holds a slice of series to resolve hash collisions.
 // Its methods require the hash to be submitted with it to avoid re-computations throughout
 // the code.
-type seriesHashmap map[uint64][]*memSeries
+type seriesHashmap map[uint64][]*record.MemSeries
 
-func (m seriesHashmap) get(hash uint64, lset labels.Labels) *memSeries {
+func (m seriesHashmap) get(hash uint64, lset labels.Labels) *record.MemSeries {
 	for _, s := range m[hash] {
-		if s.lset.Equals(lset) {
+		if s.Lset.Equals(lset) {
 			return s
 		}
 	}
 	return nil
 }
 
-func (m seriesHashmap) set(hash uint64, s *memSeries) {
+func (m seriesHashmap) set(hash uint64, s *record.MemSeries) {
 	l := m[hash]
 	for i, prev := range l {
-		if prev.lset.Equals(s.lset) {
+		if prev.Lset.Equals(s.Lset) {
 			l[i] = s
 			return
 		}
@@ -1441,9 +1431,9 @@ func (m seriesHashmap) set(hash uint64, s *memSeries) {
 }
 
 func (m seriesHashmap) del(hash uint64, lset labels.Labels) {
-	var rem []*memSeries
+	var rem []*record.MemSeries
 	for _, s := range m[hash] {
-		if !s.lset.Equals(lset) {
+		if !s.Lset.Equals(lset) {
 			rem = append(rem, s)
 		}
 	}
@@ -1459,7 +1449,7 @@ func (m seriesHashmap) del(hash uint64, lset labels.Labels) {
 // with the maps was profiled to be slower – likely due to the additional pointer
 // dereferences.
 type stripeSeries struct {
-	series [stripeSize]map[uint64]*memSeries
+	series [stripeSize]map[uint64]*record.MemSeries
 	hashes [stripeSize]seriesHashmap
 	locks  [stripeSize]stripeLock
 }
@@ -1479,7 +1469,7 @@ func newStripeSeries() *stripeSeries {
 	s := &stripeSeries{}
 
 	for i := range s.series {
-		s.series[i] = map[uint64]*memSeries{}
+		s.series[i] = map[uint64]*record.MemSeries{}
 	}
 	for i := range s.hashes {
 		s.hashes[i] = seriesHashmap{}
@@ -1502,9 +1492,9 @@ func (s *stripeSeries) gc(mint int64) (map[uint64]struct{}, int) {
 		for hash, all := range s.hashes[i] {
 			for _, series := range all {
 				series.Lock()
-				rmChunks += series.truncateChunksBefore(mint)
+				rmChunks += series.TruncateChunksBefore(mint)
 
-				if len(series.chunks) > 0 || series.pendingCommit {
+				if len(series.Chunks) > 0 || series.PendingCommit {
 					series.Unlock()
 					continue
 				}
@@ -1514,15 +1504,15 @@ func (s *stripeSeries) gc(mint int64) (map[uint64]struct{}, int) {
 				// series alike.
 				// If we don't hold them all, there's a very small chance that a series receives
 				// samples again while we are half-way into deleting it.
-				j := int(series.ref & stripeMask)
+				j := int(series.Ref & stripeMask)
 
 				if i != j {
 					s.locks[j].Lock()
 				}
 
-				deleted[series.ref] = struct{}{}
-				s.hashes[i].del(hash, series.lset)
-				delete(s.series[j], series.ref)
+				deleted[series.Ref] = struct{}{}
+				s.hashes[i].del(hash, series.Lset)
+				delete(s.series[j], series.Ref)
 
 				if i != j {
 					s.locks[j].Unlock()
@@ -1538,7 +1528,7 @@ func (s *stripeSeries) gc(mint int64) (map[uint64]struct{}, int) {
 	return deleted, rmChunks
 }
 
-func (s *stripeSeries) getByID(id uint64) *memSeries {
+func (s *stripeSeries) getByID(id uint64) *record.MemSeries {
 	i := id & stripeMask
 
 	s.locks[i].RLock()
@@ -1548,7 +1538,7 @@ func (s *stripeSeries) getByID(id uint64) *memSeries {
 	return series
 }
 
-func (s *stripeSeries) getByHash(hash uint64, lset labels.Labels) *memSeries {
+func (s *stripeSeries) getByHash(hash uint64, lset labels.Labels) *record.MemSeries {
 	i := hash & stripeMask
 
 	s.locks[i].RLock()
@@ -1558,220 +1548,27 @@ func (s *stripeSeries) getByHash(hash uint64, lset labels.Labels) *memSeries {
 	return series
 }
 
-func (s *stripeSeries) getOrSet(hash uint64, series *memSeries) (*memSeries, bool) {
+func (s *stripeSeries) getOrSet(hash uint64, series *record.MemSeries) (*record.MemSeries, bool) {
 	i := hash & stripeMask
 
 	s.locks[i].Lock()
 
-	if prev := s.hashes[i].get(hash, series.lset); prev != nil {
+	if prev := s.hashes[i].get(hash, series.Lset); prev != nil {
 		s.locks[i].Unlock()
 		return prev, false
 	}
 	s.hashes[i].set(hash, series)
 	s.locks[i].Unlock()
 
-	i = series.ref & stripeMask
+	i = series.Ref & stripeMask
 
 	s.locks[i].Lock()
-	s.series[i][series.ref] = series
+	s.series[i][series.Ref] = series
 	s.locks[i].Unlock()
 
 	return series, true
 }
 
-type sample struct {
-	t int64
-	v float64
-}
-
-func (s sample) T() int64 {
-	return s.t
-}
-
-func (s sample) V() float64 {
-	return s.v
-}
-
-// memSeries is the in-memory representation of a series. None of its methods
-// are goroutine safe and it is the caller's responsibility to lock it.
-type memSeries struct {
-	sync.Mutex
-
-	ref          uint64
-	lset         labels.Labels
-	chunks       []*memChunk
-	headChunk    *memChunk
-	chunkRange   int64
-	firstChunkID int
-
-	nextAt        int64 // Timestamp at which to cut the next chunk.
-	sampleBuf     [4]sample
-	pendingCommit bool // Whether there are samples waiting to be committed to this series.
-
-	app chunkenc.Appender // Current appender for the chunk.
-}
-
-func newMemSeries(lset labels.Labels, id uint64, chunkRange int64) *memSeries {
-	s := &memSeries{
-		lset:       lset,
-		ref:        id,
-		chunkRange: chunkRange,
-		nextAt:     math.MinInt64,
-	}
-	return s
-}
-
-func (s *memSeries) minTime() int64 {
-	if len(s.chunks) == 0 {
-		return math.MinInt64
-	}
-	return s.chunks[0].minTime
-}
-
-func (s *memSeries) maxTime() int64 {
-	c := s.head()
-	if c == nil {
-		return math.MinInt64
-	}
-	return c.maxTime
-}
-
-func (s *memSeries) cut(mint int64) *memChunk {
-	c := &memChunk{
-		chunk:   chunkenc.NewXORChunk(),
-		minTime: mint,
-		maxTime: math.MinInt64,
-	}
-	s.chunks = append(s.chunks, c)
-	s.headChunk = c
-
-	// Set upper bound on when the next chunk must be started. An earlier timestamp
-	// may be chosen dynamically at a later point.
-	s.nextAt = rangeForTimestamp(mint, s.chunkRange)
-
-	app, err := c.chunk.Appender()
-	if err != nil {
-		panic(err)
-	}
-	s.app = app
-	return c
-}
-
-func (s *memSeries) chunksMetas() []chunks.Meta {
-	metas := make([]chunks.Meta, 0, len(s.chunks))
-	for _, chk := range s.chunks {
-		metas = append(metas, chunks.Meta{Chunk: chk.chunk, MinTime: chk.minTime, MaxTime: chk.maxTime})
-	}
-	return metas
-}
-
-// reset re-initialises all the variable in the memSeries except 'lset', 'ref',
-// and 'chunkRange', like how it would appear after 'newMemSeries(...)'.
-func (s *memSeries) reset() {
-	s.chunks = nil
-	s.headChunk = nil
-	s.firstChunkID = 0
-	s.nextAt = math.MinInt64
-	s.sampleBuf = [4]sample{}
-	s.pendingCommit = false
-	s.app = nil
-}
-
-// appendable checks whether the given sample is valid for appending to the series.
-func (s *memSeries) appendable(t int64, v float64) error {
-	c := s.head()
-	if c == nil {
-		return nil
-	}
-
-	if t > c.maxTime {
-		return nil
-	}
-	if t < c.maxTime {
-		return ErrOutOfOrderSample
-	}
-	// We are allowing exact duplicates as we can encounter them in valid cases
-	// like federation and erroring out at that time would be extremely noisy.
-	if math.Float64bits(s.sampleBuf[3].v) != math.Float64bits(v) {
-		return ErrAmendSample
-	}
-	return nil
-}
-
-func (s *memSeries) chunk(id int) *memChunk {
-	ix := id - s.firstChunkID
-	if ix < 0 || ix >= len(s.chunks) {
-		return nil
-	}
-	return s.chunks[ix]
-}
-
-func (s *memSeries) chunkID(pos int) int {
-	return pos + s.firstChunkID
-}
-
-// truncateChunksBefore removes all chunks from the series that have not timestamp
-// at or after mint. Chunk IDs remain unchanged.
-func (s *memSeries) truncateChunksBefore(mint int64) (removed int) {
-	var k int
-	for i, c := range s.chunks {
-		if c.maxTime >= mint {
-			break
-		}
-		k = i + 1
-	}
-	s.chunks = append(s.chunks[:0], s.chunks[k:]...)
-	s.firstChunkID += k
-	if len(s.chunks) == 0 {
-		s.headChunk = nil
-	} else {
-		s.headChunk = s.chunks[len(s.chunks)-1]
-	}
-
-	return k
-}
-
-// append adds the sample (t, v) to the series.
-func (s *memSeries) append(t int64, v float64) (success, chunkCreated bool) {
-	// Based on Gorilla white papers this offers near-optimal compression ratio
-	// so anything bigger that this has diminishing returns and increases
-	// the time range within which we have to decompress all samples.
-	const samplesPerChunk = 120
-
-	c := s.head()
-
-	if c == nil {
-		c = s.cut(t)
-		chunkCreated = true
-	}
-	numSamples := c.chunk.NumSamples()
-
-	// Out of order sample.
-	if c.maxTime >= t {
-		return false, chunkCreated
-	}
-	// If we reach 25% of a chunk's desired sample count, set a definitive time
-	// at which to start the next chunk.
-	// At latest it must happen at the timestamp set when the chunk was cut.
-	if numSamples == samplesPerChunk/4 {
-		s.nextAt = computeChunkEndTime(c.minTime, c.maxTime, s.nextAt)
-	}
-	if t >= s.nextAt {
-		c = s.cut(t)
-		chunkCreated = true
-	}
-	s.app.Append(t, v)
-
-	c.maxTime = t
-
-	s.sampleBuf[0] = s.sampleBuf[1]
-	s.sampleBuf[1] = s.sampleBuf[2]
-	s.sampleBuf[2] = s.sampleBuf[3]
-	s.sampleBuf[3] = sample{t: t, v: v}
-
-	return true, chunkCreated
-}
-
 // computeChunkEndTime estimates the end timestamp based the beginning of a chunk,
 // its current timestamp and the upper bound up to which we insert data.
 // It assumes that the time range is 1/4 full.
diff --git a/head_test.go b/head_test.go
index 040ae828..50b66196 100644
--- a/head_test.go
+++ b/head_test.go
@@ -30,6 +30,7 @@ import (
 	"github.com/prometheus/tsdb/chunks"
 	"github.com/prometheus/tsdb/index"
 	"github.com/prometheus/tsdb/labels"
+	"github.com/prometheus/tsdb/record"
 	"github.com/prometheus/tsdb/testutil"
 	"github.com/prometheus/tsdb/tsdbutil"
 	"github.com/prometheus/tsdb/wal"
@@ -51,14 +52,14 @@ func BenchmarkCreateSeries(b *testing.B) {
 }
 
 func populateTestWAL(t testing.TB, w *wal.WAL, recs []interface{}) {
-	var enc RecordEncoder
+	var enc record.RecordEncoder
 	for _, r := range recs {
 		switch v := r.(type) {
-		case []RefSeries:
+		case []record.RefSeries:
 			testutil.Ok(t, w.Log(enc.Series(v, nil)))
-		case []RefSample:
+		case []record.RefSample:
 			testutil.Ok(t, w.Log(enc.Samples(v, nil)))
-		case []Stone:
+		case []record.Stone:
 			testutil.Ok(t, w.Log(enc.Tombstones(v, nil)))
 		}
 	}
@@ -69,22 +70,22 @@ func readTestWAL(t testing.TB, dir string) (recs []interface{}) {
 	testutil.Ok(t, err)
 	defer sr.Close()
 
-	var dec RecordDecoder
+	var dec record.RecordDecoder
 	r := wal.NewReader(sr)
 
 	for r.Next() {
 		rec := r.Record()
 
 		switch dec.Type(rec) {
-		case RecordSeries:
+		case record.RecordSeries:
 			series, err := dec.Series(rec, nil)
 			testutil.Ok(t, err)
 			recs = append(recs, series)
-		case RecordSamples:
+		case record.RecordSamples:
 			samples, err := dec.Samples(rec, nil)
 			testutil.Ok(t, err)
 			recs = append(recs, samples)
-		case RecordTombstones:
+		case record.RecordTombstones:
 			tstones, err := dec.Tombstones(rec, nil)
 			testutil.Ok(t, err)
 			recs = append(recs, tstones)
@@ -223,38 +224,38 @@ func TestHead_Truncate(t *testing.T) {
 	s3, _ := h.getOrCreate(3, labels.FromStrings("a", "1", "b", "2"))
 	s4, _ := h.getOrCreate(4, labels.FromStrings("a", "2", "b", "2", "c", "1"))
 
-	s1.chunks = []*memChunk{
-		{minTime: 0, maxTime: 999},
-		{minTime: 1000, maxTime: 1999},
-		{minTime: 2000, maxTime: 2999},
+	s1.Chunks = []*record.MemChunk{
+		{MinTime: 0, MaxTime: 999},
+		{MinTime: 1000, MaxTime: 1999},
+		{MinTime: 2000, MaxTime: 2999},
 	}
-	s2.chunks = []*memChunk{
-		{minTime: 1000, maxTime: 1999},
-		{minTime: 2000, maxTime: 2999},
-		{minTime: 3000, maxTime: 3999},
+	s2.Chunks = []*record.MemChunk{
+		{MinTime: 1000, MaxTime: 1999},
+		{MinTime: 2000, MaxTime: 2999},
+		{MinTime: 3000, MaxTime: 3999},
 	}
-	s3.chunks = []*memChunk{
-		{minTime: 0, maxTime: 999},
-		{minTime: 1000, maxTime: 1999},
+	s3.Chunks = []*record.MemChunk{
+		{MinTime: 0, MaxTime: 999},
+		{MinTime: 1000, MaxTime: 1999},
 	}
-	s4.chunks = []*memChunk{}
+	s4.Chunks = []*record.MemChunk{}
 
 	// Truncation need not be aligned.
 	testutil.Ok(t, h.Truncate(1))
 
 	testutil.Ok(t, h.Truncate(2000))
 
-	testutil.Equals(t, []*memChunk{
-		{minTime: 2000, maxTime: 2999},
-	}, h.series.getByID(s1.ref).chunks)
+	testutil.Equals(t, []*record.MemChunk{
+		{MinTime: 2000, MaxTime: 2999},
+	}, h.series.getByID(s1.Ref).Chunks)
 
-	testutil.Equals(t, []*memChunk{
-		{minTime: 2000, maxTime: 2999},
-		{minTime: 3000, maxTime: 3999},
-	}, h.series.getByID(s2.ref).chunks)
+	testutil.Equals(t, []*record.MemChunk{
+		{MinTime: 2000, MaxTime: 2999},
+		{MinTime: 3000, MaxTime: 3999},
+	}, h.series.getByID(s2.Ref).Chunks)
 
-	testutil.Assert(t, h.series.getByID(s3.ref) == nil, "")
-	testutil.Assert(t, h.series.getByID(s4.ref) == nil, "")
+	testutil.Assert(t, h.series.getByID(s3.Ref) == nil, "")
+	testutil.Assert(t, h.series.getByID(s4.Ref) == nil, "")
 
 	postingsA1, _ := index.ExpandPostings(h.postings.Get("a", "1"))
 	postingsA2, _ := index.ExpandPostings(h.postings.Get("a", "2"))
@@ -263,10 +264,10 @@ func TestHead_Truncate(t *testing.T) {
 	postingsC1, _ := index.ExpandPostings(h.postings.Get("c", "1"))
 	postingsAll, _ := index.ExpandPostings(h.postings.Get("", ""))
 
-	testutil.Equals(t, []uint64{s1.ref}, postingsA1)
-	testutil.Equals(t, []uint64{s2.ref}, postingsA2)
-	testutil.Equals(t, []uint64{s1.ref, s2.ref}, postingsB1)
-	testutil.Equals(t, []uint64{s1.ref, s2.ref}, postingsAll)
+	testutil.Equals(t, []uint64{s1.Ref}, postingsA1)
+	testutil.Equals(t, []uint64{s2.Ref}, postingsA2)
+	testutil.Equals(t, []uint64{s1.Ref, s2.Ref}, postingsB1)
+	testutil.Equals(t, []uint64{s1.Ref, s2.Ref}, postingsAll)
 	testutil.Assert(t, postingsB2 == nil, "")
 	testutil.Assert(t, postingsC1 == nil, "")
 
@@ -288,28 +289,28 @@ func TestHead_Truncate(t *testing.T) {
 // Validate various behaviors brought on by firstChunkID accounting for
 // garbage collected chunks.
 func TestMemSeries_truncateChunks(t *testing.T) {
-	s := newMemSeries(labels.FromStrings("a", "b"), 1, 2000)
+	s := record.NewMemSeries(labels.FromStrings("a", "b"), 1, 2000)
 
 	for i := 0; i < 4000; i += 5 {
-		ok, _ := s.append(int64(i), float64(i))
+		ok, _ := s.Append(int64(i), float64(i))
 		testutil.Assert(t, ok == true, "sample append failed")
 	}
 
 	// Check that truncate removes half of the chunks and afterwards
 	// that the ID of the last chunk still gives us the same chunk afterwards.
-	countBefore := len(s.chunks)
-	lastID := s.chunkID(countBefore - 1)
-	lastChunk := s.chunk(lastID)
+	countBefore := len(s.Chunks)
+	lastID := s.ChunkID(countBefore - 1)
+	lastChunk := s.Chunk(lastID)
 
-	testutil.Assert(t, s.chunk(0) != nil, "")
+	testutil.Assert(t, s.Chunk(0) != nil, "")
 	testutil.Assert(t, lastChunk != nil, "")
 
-	s.truncateChunksBefore(2000)
+	s.TruncateChunksBefore(2000)
 
-	testutil.Equals(t, int64(2000), s.chunks[0].minTime)
-	testutil.Assert(t, s.chunk(0) == nil, "first chunks not gone")
-	testutil.Equals(t, countBefore/2, len(s.chunks))
-	testutil.Equals(t, lastChunk, s.chunk(lastID))
+	testutil.Equals(t, int64(2000), s.Chunks[0].MinTime)
+	testutil.Assert(t, s.Chunk(0) == nil, "first chunks not gone")
+	testutil.Equals(t, countBefore/2, len(s.Chunks))
+	testutil.Equals(t, lastChunk, s.Chunk(lastID))
 
 	// Validate that the series' sample buffer is applied correctly to the last chunk
 	// after truncation.
@@ -371,27 +372,27 @@ func TestHeadDeleteSimple(t *testing.T) {
 	lblDefault := labels.Label{"a", "b"}
 
 	cases := []struct {
-		dranges  Intervals
+		dranges  record.Intervals
 		smplsExp []sample
 	}{
 		{
-			dranges:  Intervals{{0, 3}},
+			dranges:  record.Intervals{{0, 3}},
 			smplsExp: buildSmpls([]int64{4, 5, 6, 7, 8, 9}),
 		},
 		{
-			dranges:  Intervals{{1, 3}},
+			dranges:  record.Intervals{{1, 3}},
 			smplsExp: buildSmpls([]int64{0, 4, 5, 6, 7, 8, 9}),
 		},
 		{
-			dranges:  Intervals{{1, 3}, {4, 7}},
+			dranges:  record.Intervals{{1, 3}, {4, 7}},
 			smplsExp: buildSmpls([]int64{0, 8, 9}),
 		},
 		{
-			dranges:  Intervals{{1, 3}, {4, 700}},
+			dranges:  record.Intervals{{1, 3}, {4, 700}},
 			smplsExp: buildSmpls([]int64{0}),
 		},
 		{ // This case is to ensure that labels and symbols are deleted.
-			dranges:  Intervals{{0, 9}},
+			dranges:  record.Intervals{{0, 9}},
 			smplsExp: buildSmpls([]int64{}),
 		},
 	}
@@ -591,7 +592,7 @@ func TestDeletedSamplesAndSeriesStillInWALAfterCheckpoint(t *testing.T) {
 	testutil.Ok(t, hb.Close())
 
 	// Confirm there's been a checkpoint.
-	cdir, _, err := LastCheckpoint(dir)
+	cdir, _, err := wal.LastCheckpoint(dir)
 	testutil.Ok(t, err)
 	// Read in checkpoint and WAL.
 	recs := readTestWAL(t, cdir)
@@ -600,11 +601,11 @@ func TestDeletedSamplesAndSeriesStillInWALAfterCheckpoint(t *testing.T) {
 	var series, samples, stones int
 	for _, rec := range recs {
 		switch rec.(type) {
-		case []RefSeries:
+		case []record.RefSeries:
 			series++
-		case []RefSample:
+		case []record.RefSample:
 			samples++
-		case []Stone:
+		case []record.Stone:
 			stones++
 		default:
 			t.Fatalf("unknown record type")
@@ -692,18 +693,18 @@ func TestDelete_e2e(t *testing.T) {
 	// Delete a time-range from each-selector.
 	dels := []struct {
 		ms     []labels.Matcher
-		drange Intervals
+		drange record.Intervals
 	}{
 		{
 			ms:     []labels.Matcher{labels.NewEqualMatcher("a", "b")},
-			drange: Intervals{{300, 500}, {600, 670}},
+			drange: record.Intervals{{300, 500}, {600, 670}},
 		},
 		{
 			ms: []labels.Matcher{
 				labels.NewEqualMatcher("a", "b"),
 				labels.NewEqualMatcher("job", "prom-k8s"),
 			},
-			drange: Intervals{{300, 500}, {100, 670}},
+			drange: record.Intervals{{300, 500}, {100, 670}},
 		},
 		{
 			ms: []labels.Matcher{
@@ -711,7 +712,7 @@ func TestDelete_e2e(t *testing.T) {
 				labels.NewEqualMatcher("instance", "localhost:9090"),
 				labels.NewEqualMatcher("job", "prometheus"),
 			},
-			drange: Intervals{{300, 400}, {100, 6700}},
+			drange: record.Intervals{{300, 400}, {100, 6700}},
 		},
 		// TODO: Add Regexp Matchers.
 	}
@@ -794,12 +795,12 @@ func boundedSamples(full []tsdbutil.Sample, mint, maxt int64) []tsdbutil.Sample
 	return full
 }
 
-func deletedSamples(full []tsdbutil.Sample, dranges Intervals) []tsdbutil.Sample {
+func deletedSamples(full []tsdbutil.Sample, dranges record.Intervals) []tsdbutil.Sample {
 	ds := make([]tsdbutil.Sample, 0, len(full))
 Outer:
 	for _, s := range full {
 		for _, r := range dranges {
-			if r.inBounds(s.T()) {
+			if r.InBounds(s.T()) {
 				continue Outer
 			}
 		}
@@ -852,42 +853,42 @@ func TestComputeChunkEndTime(t *testing.T) {
 }
 
 func TestMemSeries_append(t *testing.T) {
-	s := newMemSeries(labels.Labels{}, 1, 500)
+	s := record.NewMemSeries(labels.Labels{}, 1, 500)
 
 	// Add first two samples at the very end of a chunk range and the next two
 	// on and after it.
 	// New chunk must correctly be cut at 1000.
-	ok, chunkCreated := s.append(998, 1)
+	ok, chunkCreated := s.Append(998, 1)
 	testutil.Assert(t, ok, "append failed")
 	testutil.Assert(t, chunkCreated, "first sample created chunk")
 
-	ok, chunkCreated = s.append(999, 2)
+	ok, chunkCreated = s.Append(999, 2)
 	testutil.Assert(t, ok, "append failed")
 	testutil.Assert(t, !chunkCreated, "second sample should use same chunk")
 
-	ok, chunkCreated = s.append(1000, 3)
+	ok, chunkCreated = s.Append(1000, 3)
 	testutil.Assert(t, ok, "append failed")
 	testutil.Assert(t, chunkCreated, "expected new chunk on boundary")
 
-	ok, chunkCreated = s.append(1001, 4)
+	ok, chunkCreated = s.Append(1001, 4)
 	testutil.Assert(t, ok, "append failed")
 	testutil.Assert(t, !chunkCreated, "second sample should use same chunk")
 
-	testutil.Assert(t, s.chunks[0].minTime == 998 && s.chunks[0].maxTime == 999, "wrong chunk range")
-	testutil.Assert(t, s.chunks[1].minTime == 1000 && s.chunks[1].maxTime == 1001, "wrong chunk range")
+	testutil.Assert(t, s.Chunks[0].MinTime == 998 && s.Chunks[0].MaxTime == 999, "wrong chunk range")
+	testutil.Assert(t, s.Chunks[1].MinTime == 1000 && s.Chunks[1].MaxTime == 1001, "wrong chunk range")
 
 	// Fill the range [1000,2000) with many samples. Intermediate chunks should be cut
 	// at approximately 120 samples per chunk.
 	for i := 1; i < 1000; i++ {
-		ok, _ := s.append(1001+int64(i), float64(i))
+		ok, _ := s.Append(1001+int64(i), float64(i))
 		testutil.Assert(t, ok, "append failed")
 	}
 
-	testutil.Assert(t, len(s.chunks) > 7, "expected intermediate chunks")
+	testutil.Assert(t, len(s.Chunks) > 7, "expected intermediate chunks")
 
 	// All chunks but the first and last should now be moderately full.
-	for i, c := range s.chunks[1 : len(s.chunks)-1] {
-		testutil.Assert(t, c.chunk.NumSamples() > 100, "unexpected small chunk %d of length %d", i, c.chunk.NumSamples())
+	for i, c := range s.Chunks[1 : len(s.Chunks)-1] {
+		testutil.Assert(t, c.Chunk.NumSamples() > 100, "unexpected small chunk %d of length %d", i, c.Chunk.NumSamples())
 	}
 }
 
@@ -900,9 +901,9 @@ func TestGCChunkAccess(t *testing.T) {
 	h.initTime(0)
 
 	s, _ := h.getOrCreate(1, labels.FromStrings("a", "1"))
-	s.chunks = []*memChunk{
-		{minTime: 0, maxTime: 999},
-		{minTime: 1000, maxTime: 1999},
+	s.Chunks = []*record.MemChunk{
+		{MinTime: 0, MaxTime: 999},
+		{MinTime: 1000, MaxTime: 1999},
 	}
 
 	idx := h.indexRange(0, 1500)
@@ -926,7 +927,7 @@ func TestGCChunkAccess(t *testing.T) {
 	testutil.Ok(t, h.Truncate(1500)) // Remove a chunk.
 
 	_, err = cr.Chunk(chunks[0].Ref)
-	testutil.Equals(t, ErrNotFound, err)
+	testutil.Equals(t, record.ErrNotFound, err)
 	_, err = cr.Chunk(chunks[1].Ref)
 	testutil.Ok(t, err)
 }
@@ -940,9 +941,9 @@ func TestGCSeriesAccess(t *testing.T) {
 	h.initTime(0)
 
 	s, _ := h.getOrCreate(1, labels.FromStrings("a", "1"))
-	s.chunks = []*memChunk{
-		{minTime: 0, maxTime: 999},
-		{minTime: 1000, maxTime: 1999},
+	s.Chunks = []*record.MemChunk{
+		{MinTime: 0, MaxTime: 999},
+		{MinTime: 1000, MaxTime: 1999},
 	}
 
 	idx := h.indexRange(0, 2000)
@@ -965,12 +966,12 @@ func TestGCSeriesAccess(t *testing.T) {
 
 	testutil.Ok(t, h.Truncate(2000)) // Remove the series.
 
-	testutil.Equals(t, (*memSeries)(nil), h.series.getByID(1))
+	testutil.Equals(t, (*record.MemSeries)(nil), h.series.getByID(1))
 
 	_, err = cr.Chunk(chunks[0].Ref)
-	testutil.Equals(t, ErrNotFound, err)
+	testutil.Equals(t, record.ErrNotFound, err)
 	_, err = cr.Chunk(chunks[1].Ref)
-	testutil.Equals(t, ErrNotFound, err)
+	testutil.Equals(t, record.ErrNotFound, err)
 }
 
 func TestUncommittedSamplesNotLostOnTruncate(t *testing.T) {
@@ -1028,7 +1029,7 @@ func TestRemoveSeriesAfterRollbackAndTruncate(t *testing.T) {
 
 	// Truncate again, this time the series should be deleted
 	testutil.Ok(t, h.Truncate(2050))
-	testutil.Equals(t, (*memSeries)(nil), h.series.getByHash(lset.Hash(), lset))
+	testutil.Equals(t, (*record.MemSeries)(nil), h.series.getByHash(lset.Hash(), lset))
 }
 
 func TestHead_LogRollback(t *testing.T) {
@@ -1080,7 +1081,7 @@ func TestWalRepair_DecodingError(t *testing.T) {
 				res[0] = byte(RecordInvalid)
 				return res
 			},
-			enc.Series([]RefSeries{{Ref: 1, Labels: labels.FromStrings("a", "b")}}, []byte{}),
+			enc.Series([]record.RefSeries{{Ref: 1, Labels: labels.FromStrings("a", "b")}}, []byte{}),
 			9,
 			5,
 		},
@@ -1088,7 +1089,7 @@ func TestWalRepair_DecodingError(t *testing.T) {
 			func(rec []byte) []byte {
 				return rec[:3]
 			},
-			enc.Series([]RefSeries{{Ref: 1, Labels: labels.FromStrings("a", "b")}}, []byte{}),
+			enc.Series([]record.RefSeries{{Ref: 1, Labels: labels.FromStrings("a", "b")}}, []byte{}),
 			9,
 			5,
 		},
@@ -1096,7 +1097,7 @@ func TestWalRepair_DecodingError(t *testing.T) {
 			func(rec []byte) []byte {
 				return rec[:3]
 			},
-			enc.Samples([]RefSample{{Ref: 0, T: 99, V: 1}}, []byte{}),
+			enc.Samples([]record.RefSample{{Ref: 0, T: 99, V: 1}}, []byte{}),
 			9,
 			5,
 		},
@@ -1104,7 +1105,7 @@ func TestWalRepair_DecodingError(t *testing.T) {
 			func(rec []byte) []byte {
 				return rec[:3]
 			},
-			enc.Tombstones([]Stone{{ref: 1, intervals: Intervals{}}}, []byte{}),
+			enc.Tombstones([]record.Stone{{Ref: 1, Intervals: record.Intervals{}}}, []byte{}),
 			9,
 			5,
 		},
diff --git a/mocks_test.go b/mocks_test.go
index 35f5ffec..f48ea9aa 100644
--- a/mocks_test.go
+++ b/mocks_test.go
@@ -18,6 +18,7 @@ import (
 	"github.com/prometheus/tsdb/chunks"
 	"github.com/prometheus/tsdb/index"
 	"github.com/prometheus/tsdb/labels"
+	"github.com/prometheus/tsdb/record"
 )
 
 type mockIndexWriter struct {
diff --git a/querier.go b/querier.go
index fbd9493f..2b655106 100644
--- a/querier.go
+++ b/querier.go
@@ -25,6 +25,7 @@ import (
 	tsdb_errors "github.com/prometheus/tsdb/errors"
 	"github.com/prometheus/tsdb/index"
 	"github.com/prometheus/tsdb/labels"
+	"github.com/prometheus/tsdb/record"
 )
 
 // Querier provides querying access over time series data of a fixed
@@ -204,7 +205,7 @@ func NewBlockQuerier(b BlockReader, mint, maxt int64) (Querier, error) {
 type blockQuerier struct {
 	index      IndexReader
 	chunks     ChunkReader
-	tombstones TombstoneReader
+	tombstones record.TombstoneReader
 
 	closed bool
 
@@ -670,7 +671,7 @@ func (s *mergedVerticalSeriesSet) Next() bool {
 // actual series itself.
 type ChunkSeriesSet interface {
 	Next() bool
-	At() (labels.Labels, []chunks.Meta, Intervals)
+	At() (labels.Labels, []chunks.Meta, record.Intervals)
 	Err() error
 }
 
@@ -679,19 +680,19 @@ type ChunkSeriesSet interface {
 type baseChunkSeries struct {
 	p          index.Postings
 	index      IndexReader
-	tombstones TombstoneReader
+	tombstones record.TombstoneReader
 
 	lset      labels.Labels
 	chks      []chunks.Meta
-	intervals Intervals
+	intervals record.Intervals
 	err       error
 }
 
 // LookupChunkSeries retrieves all series for the given matchers and returns a ChunkSeriesSet
 // over them. It drops chunks based on tombstones in the given reader.
-func LookupChunkSeries(ir IndexReader, tr TombstoneReader, ms ...labels.Matcher) (ChunkSeriesSet, error) {
+func LookupChunkSeries(ir IndexReader, tr record.TombstoneReader, ms ...labels.Matcher) (ChunkSeriesSet, error) {
 	if tr == nil {
-		tr = newMemTombstones()
+		tr = record.NewMemTombstones()
 	}
 	p, err := PostingsForMatchers(ir, ms...)
 	if err != nil {
@@ -704,7 +705,7 @@ func LookupChunkSeries(ir IndexReader, tr TombstoneReader, ms ...labels.Matcher)
 	}, nil
 }
 
-func (s *baseChunkSeries) At() (labels.Labels, []chunks.Meta, Intervals) {
+func (s *baseChunkSeries) At() (labels.Labels, []chunks.Meta, record.Intervals) {
 	return s.lset, s.chks, s.intervals
 }
 
@@ -721,7 +722,7 @@ func (s *baseChunkSeries) Next() bool {
 		ref := s.p.At()
 		if err := s.index.Series(ref, &lset, &chkMetas); err != nil {
 			// Postings may be stale. Skip if no underlying series exists.
-			if errors.Cause(err) == ErrNotFound {
+			if errors.Cause(err) == record.ErrNotFound {
 				continue
 			}
 			s.err = err
@@ -740,7 +741,7 @@ func (s *baseChunkSeries) Next() bool {
 			// Only those chunks that are not entirely deleted.
 			chks := make([]chunks.Meta, 0, len(s.chks))
 			for _, chk := range s.chks {
-				if !(Interval{chk.MinTime, chk.MaxTime}.isSubrange(s.intervals)) {
+				if !(record.Interval{chk.MinTime, chk.MaxTime}.IsSubrange(s.intervals)) {
 					chks = append(chks, chk)
 				}
 			}
@@ -767,10 +768,10 @@ type populatedChunkSeries struct {
 	err       error
 	chks      []chunks.Meta
 	lset      labels.Labels
-	intervals Intervals
+	intervals record.Intervals
 }
 
-func (s *populatedChunkSeries) At() (labels.Labels, []chunks.Meta, Intervals) {
+func (s *populatedChunkSeries) At() (labels.Labels, []chunks.Meta, record.Intervals) {
 	return s.lset, s.chks, s.intervals
 }
 
@@ -801,7 +802,7 @@ func (s *populatedChunkSeries) Next() bool {
 			c.Chunk, s.err = s.chunks.Chunk(c.Ref)
 			if s.err != nil {
 				// This means that the chunk has be garbage collected. Remove it from the list.
-				if s.err == ErrNotFound {
+				if s.err == record.ErrNotFound {
 					s.err = nil
 					// Delete in-place.
 					s.chks = append(chks[:j], chks[j+1:]...)
@@ -865,7 +866,7 @@ type chunkSeries struct {
 
 	mint, maxt int64
 
-	intervals Intervals
+	intervals record.Intervals
 }
 
 func (s *chunkSeries) Labels() labels.Labels {
@@ -1066,7 +1067,7 @@ type chunkSeriesIterator struct {
 
 	maxt, mint int64
 
-	intervals Intervals
+	intervals record.Intervals
 }
 
 func newChunkSeriesIterator(cs []chunks.Meta, dranges Intervals, mint, maxt int64) *chunkSeriesIterator {
@@ -1168,7 +1169,7 @@ func (it *chunkSeriesIterator) Err() error {
 type deletedIterator struct {
 	it chunkenc.Iterator
 
-	intervals Intervals
+	intervals record.Intervals
 }
 
 func (it *deletedIterator) At() (int64, float64) {
@@ -1181,7 +1182,7 @@ Outer:
 		ts, _ := it.it.At()
 
 		for _, tr := range it.intervals {
-			if tr.inBounds(ts) {
+			if tr.InBounds(ts) {
 				continue Outer
 			}
 
diff --git a/querier_test.go b/querier_test.go
index 2be48fcd..a1bdf395 100644
--- a/querier_test.go
+++ b/querier_test.go
@@ -29,6 +29,7 @@ import (
 	"github.com/prometheus/tsdb/chunks"
 	"github.com/prometheus/tsdb/index"
 	"github.com/prometheus/tsdb/labels"
+	"github.com/prometheus/tsdb/record"
 	"github.com/prometheus/tsdb/testutil"
 	"github.com/prometheus/tsdb/tsdbutil"
 )
@@ -188,6 +189,19 @@ func expandSeriesIterator(it SeriesIterator) (r []tsdbutil.Sample, err error) {
 	return r, it.Err()
 }
 
+type sample struct {
+	t int64
+	v float64
+}
+
+func (s sample) T() int64 {
+	return s.t
+}
+
+func (s sample) V() float64 {
+	return s.v
+}
+
 type seriesSamples struct {
 	lset   map[string]string
 	chunks [][]sample
@@ -368,7 +382,7 @@ Outer:
 		querier := &blockQuerier{
 			index:      ir,
 			chunks:     cr,
-			tombstones: newMemTombstones(),
+			tombstones: record.NewMemTombstones(),
 
 			mint: c.mint,
 			maxt: c.maxt,
@@ -415,7 +429,7 @@ func TestBlockQuerierDelete(t *testing.T) {
 	cases := struct {
 		data []seriesSamples
 
-		tombstones TombstoneReader
+		tombstones record.TombstoneReader
 		queries    []query
 	}{
 		data: []seriesSamples{
@@ -460,10 +474,10 @@ func TestBlockQuerierDelete(t *testing.T) {
 				},
 			},
 		},
-		tombstones: &memTombstones{intvlGroups: map[uint64]Intervals{
-			1: Intervals{{1, 3}},
-			2: Intervals{{1, 3}, {6, 10}},
-			3: Intervals{{6, 10}},
+		tombstones: &record.MemTombstones{IntvlGroups: map[uint64]record.Intervals{
+			1: record.Intervals{{1, 3}},
+			2: record.Intervals{{1, 3}, {6, 10}},
+			3: record.Intervals{{6, 10}},
 		}},
 		queries: []query{
 			{
@@ -637,7 +651,7 @@ func TestBaseChunkSeries(t *testing.T) {
 		bcs := &baseChunkSeries{
 			p:          index.NewListPostings(tc.postings),
 			index:      mi,
-			tombstones: newMemTombstones(),
+			tombstones: record.NewMemTombstones(),
 		}
 
 		i := 0
@@ -1159,7 +1173,7 @@ func (m *mockChunkSeriesSet) Next() bool {
 	return m.i < len(m.l)
 }
 
-func (m *mockChunkSeriesSet) At() (labels.Labels, []chunks.Meta, Intervals) {
+func (m *mockChunkSeriesSet) At() (labels.Labels, []chunks.Meta, record.Intervals) {
 	return m.l[m.i], m.cm[m.i], nil
 }
 
@@ -1254,18 +1268,18 @@ func TestDeletedIterator(t *testing.T) {
 	}
 
 	cases := []struct {
-		r Intervals
+		r record.Intervals
 	}{
-		{r: Intervals{{1, 20}}},
-		{r: Intervals{{1, 10}, {12, 20}, {21, 23}, {25, 30}}},
-		{r: Intervals{{1, 10}, {12, 20}, {20, 30}}},
-		{r: Intervals{{1, 10}, {12, 23}, {25, 30}}},
-		{r: Intervals{{1, 23}, {12, 20}, {25, 30}}},
-		{r: Intervals{{1, 23}, {12, 20}, {25, 3000}}},
-		{r: Intervals{{0, 2000}}},
-		{r: Intervals{{500, 2000}}},
-		{r: Intervals{{0, 200}}},
-		{r: Intervals{{1000, 20000}}},
+		{r: record.Intervals{{1, 20}}},
+		{r: record.Intervals{{1, 10}, {12, 20}, {21, 23}, {25, 30}}},
+		{r: record.Intervals{{1, 10}, {12, 20}, {20, 30}}},
+		{r: record.Intervals{{1, 10}, {12, 23}, {25, 30}}},
+		{r: record.Intervals{{1, 23}, {12, 20}, {25, 30}}},
+		{r: record.Intervals{{1, 23}, {12, 20}, {25, 3000}}},
+		{r: record.Intervals{{0, 2000}}},
+		{r: record.Intervals{{500, 2000}}},
+		{r: record.Intervals{{0, 200}}},
+		{r: record.Intervals{{1000, 20000}}},
 	}
 
 	for _, c := range cases {
@@ -1275,7 +1289,7 @@ func TestDeletedIterator(t *testing.T) {
 		for it.Next() {
 			i++
 			for _, tr := range ranges {
-				if tr.inBounds(i) {
+				if tr.InBounds(i) {
 					i = tr.Maxt + 1
 					ranges = ranges[1:]
 				}
@@ -1290,7 +1304,7 @@ func TestDeletedIterator(t *testing.T) {
 		// There has been an extra call to Next().
 		i++
 		for _, tr := range ranges {
-			if tr.inBounds(i) {
+			if tr.InBounds(i) {
 				i = tr.Maxt + 1
 				ranges = ranges[1:]
 			}
@@ -1403,7 +1417,7 @@ func (m mockIndex) SortedPostings(p index.Postings) index.Postings {
 func (m mockIndex) Series(ref uint64, lset *labels.Labels, chks *[]chunks.Meta) error {
 	s, ok := m.series[ref]
 	if !ok {
-		return ErrNotFound
+		return record.ErrNotFound
 	}
 	*lset = append((*lset)[:0], s.l...)
 	*chks = append((*chks)[:0], s.chunks...)
diff --git a/record/internal.go b/record/internal.go
new file mode 100644
index 00000000..840023c6
--- /dev/null
+++ b/record/internal.go
@@ -0,0 +1,371 @@
+// Copyright 2017 The Prometheus Authors
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+package record
+
+import (
+	"errors"
+	"hash"
+	"hash/crc32"
+	"math"
+	"os"
+	"path/filepath"
+	"sync"
+
+	"github.com/prometheus/tsdb/chunkenc"
+	"github.com/prometheus/tsdb/chunks"
+	"github.com/prometheus/tsdb/fileutil"
+	"github.com/prometheus/tsdb/labels"
+)
+
+var (
+	// ErrOutOfOrderSample is returned if an appended sample has a
+	// timestamp smaller than the most recent sample.
+	ErrOutOfOrderSample = errors.New("out of order sample")
+
+	// ErrNotFound is returned if a looked up resource was not found.
+	ErrNotFound = errors.New("not found")
+
+	// ErrAmendSample is returned if an appended sample has the same timestamp
+	// as the most recent sample but a different value.
+	ErrAmendSample = errors.New("amending sample")
+)
+
+// The table gets initialized with sync.Once but may still cause a race
+// with any other use of the crc32 package anywhere. Thus we initialize it
+// before.
+var castagnoliTable *crc32.Table
+
+func init() {
+	castagnoliTable = crc32.MakeTable(crc32.Castagnoli)
+}
+
+// NewCRC32 initializes a CRC32 hash with a preconfigured polynomial, so the
+// polynomial may be easily changed in one location at a later time, if necessary.
+func NewCRC32() hash.Hash32 {
+	return crc32.New(castagnoliTable)
+}
+
+type sample struct {
+	t int64
+	v float64
+}
+
+func (s sample) T() int64 {
+	return s.t
+}
+
+func (s sample) V() float64 {
+	return s.v
+}
+
+// SizeReader returns the size of the object in bytes.
+type SizeReader interface {
+	// Size returns the size in bytes.
+	Size() int64
+}
+
+// RefSeries is the series labels with the series ID.
+type RefSeries struct {
+	Ref    uint64
+	Labels labels.Labels
+}
+
+// RefSample is a timestamp/value pair associated with a reference to a series.
+type RefSample struct {
+	Ref    uint64
+	T      int64
+	V      float64
+	Series *MemSeries
+}
+
+// MemSeries is the in-memory representation of a series. None of its methods
+// are goroutine safe and it is the caller's responsibility to lock it.
+type MemSeries struct {
+	sync.Mutex
+
+	Ref           uint64
+	PendingCommit bool // Whether there are samples waiting to be committed to this series.
+	Chunks        []*MemChunk
+	Lset          labels.Labels
+
+	headChunk    *MemChunk
+	chunkRange   int64
+	firstChunkID int
+
+	nextAt    int64 // Timestamp at which to cut the next chunk.
+	sampleBuf [4]sample
+
+	app chunkenc.Appender // Current appender for the chunk.
+}
+
+func NewMemSeries(lset labels.Labels, id uint64, chunkRange int64) *MemSeries {
+	s := &MemSeries{
+		Lset:       lset,
+		Ref:        id,
+		chunkRange: chunkRange,
+		nextAt:     math.MinInt64,
+	}
+	return s
+}
+
+func (s *MemSeries) MinTime() int64 {
+	if len(s.Chunks) == 0 {
+		return math.MinInt64
+	}
+	return s.Chunks[0].MinTime
+}
+
+func (s *MemSeries) MaxTime() int64 {
+	c := s.head()
+	if c == nil {
+		return math.MinInt64
+	}
+	return c.MaxTime
+}
+
+func (s *MemSeries) cut(mint int64) *MemChunk {
+	c := &MemChunk{
+		Chunk:   chunkenc.NewXORChunk(),
+		MinTime: mint,
+		MaxTime: math.MinInt64,
+	}
+	s.Chunks = append(s.Chunks, c)
+	s.headChunk = c
+
+	// Set upper bound on when the next chunk must be started. An earlier timestamp
+	// may be chosen dynamically at a later point.
+	s.nextAt = rangeForTimestamp(mint, s.chunkRange)
+
+	app, err := c.Chunk.Appender()
+	if err != nil {
+		panic(err)
+	}
+	s.app = app
+	return c
+}
+
+func (s *MemSeries) ChunksMetas() []chunks.Meta {
+	metas := make([]chunks.Meta, 0, len(s.Chunks))
+	for _, chk := range s.Chunks {
+		metas = append(metas, chunks.Meta{Chunk: chk.Chunk, MinTime: chk.MinTime, MaxTime: chk.MaxTime})
+	}
+	return metas
+}
+
+// reset re-initialises all the variable in the MemSeries except 'lset', 'ref',
+// and 'chunkRange', like how it would appear after 'newMemSeries(...)'.
+func (s *MemSeries) Reset() {
+	s.Chunks = nil
+	s.headChunk = nil
+	s.firstChunkID = 0
+	s.nextAt = math.MinInt64
+	s.sampleBuf = [4]sample{}
+	s.PendingCommit = false
+	s.app = nil
+}
+
+// Appendable checks whether the given sample is valid for appending to the series.
+func (s *MemSeries) Appendable(t int64, v float64) error {
+	c := s.head()
+	if c == nil {
+		return nil
+	}
+
+	if t > c.MaxTime {
+		return nil
+	}
+	if t < c.MaxTime {
+		return ErrOutOfOrderSample
+	}
+	// We are allowing exact duplicates as we can encounter them in valid cases
+	// like federation and erroring out at that time would be extremely noisy.
+	if math.Float64bits(s.sampleBuf[3].v) != math.Float64bits(v) {
+		return ErrAmendSample
+	}
+	return nil
+}
+
+func (s *MemSeries) Chunk(id int) *MemChunk {
+	ix := id - s.firstChunkID
+	if ix < 0 || ix >= len(s.Chunks) {
+		return nil
+	}
+	return s.Chunks[ix]
+}
+
+func (s *MemSeries) ChunkID(pos int) int {
+	return pos + s.firstChunkID
+}
+
+// TruncateChunksBefore removes all chunks from the series that have not timestamp
+// at or after mint. Chunk IDs remain unchanged.
+func (s *MemSeries) TruncateChunksBefore(mint int64) (removed int) {
+	var k int
+	for i, c := range s.Chunks {
+		if c.MaxTime >= mint {
+			break
+		}
+		k = i + 1
+	}
+	s.Chunks = append(s.Chunks[:0], s.Chunks[k:]...)
+	s.firstChunkID += k
+	if len(s.Chunks) == 0 {
+		s.headChunk = nil
+	} else {
+		s.headChunk = s.Chunks[len(s.Chunks)-1]
+	}
+
+	return k
+}
+
+// Append adds the sample (t, v) to the series.
+func (s *MemSeries) Append(t int64, v float64) (success, chunkCreated bool) {
+	// Based on Gorilla white papers this offers near-optimal compression ratio
+	// so anything bigger that this has diminishing returns and increases
+	// the time range within which we have to decompress all samples.
+	const samplesPerChunk = 120
+
+	c := s.head()
+
+	if c == nil {
+		c = s.cut(t)
+		chunkCreated = true
+	}
+	numSamples := c.Chunk.NumSamples()
+
+	// Out of order sample.
+	if c.MaxTime >= t {
+		return false, chunkCreated
+	}
+	// If we reach 25% of a chunk's desired sample count, set a definitive time
+	// at which to start the next chunk.
+	// At latest it must happen at the timestamp set when the chunk was cut.
+	if numSamples == samplesPerChunk/4 {
+		s.nextAt = computeChunkEndTime(c.MinTime, c.MaxTime, s.nextAt)
+	}
+	if t >= s.nextAt {
+		c = s.cut(t)
+		chunkCreated = true
+	}
+	s.app.Append(t, v)
+
+	c.MaxTime = t
+
+	s.sampleBuf[0] = s.sampleBuf[1]
+	s.sampleBuf[1] = s.sampleBuf[2]
+	s.sampleBuf[2] = s.sampleBuf[3]
+	s.sampleBuf[3] = sample{t: t, v: v}
+
+	return true, chunkCreated
+}
+
+func (s *MemSeries) Iterator(id int) chunkenc.Iterator {
+	c := s.Chunk(id)
+	// TODO(fabxc): Work around! A querier may have retrieved a pointer to a series' chunk,
+	// which got then garbage collected before it got accessed.
+	// We must ensure to not garbage collect as long as any readers still hold a reference.
+	if c == nil {
+		return chunkenc.NewNopIterator()
+	}
+
+	if id-s.firstChunkID < len(s.Chunks)-1 {
+		return c.Chunk.Iterator()
+	}
+	// Serve the last 4 samples for the last chunk from the sample buffer
+	// as their compressed bytes may be mutated by added samples.
+	it := &MemSafeIterator{
+		Iterator: c.Chunk.Iterator(),
+		i:        -1,
+		total:    c.Chunk.NumSamples(),
+		buf:      s.sampleBuf,
+	}
+	return it
+}
+
+func (s *MemSeries) head() *MemChunk {
+	return s.headChunk
+}
+
+type MemChunk struct {
+	Chunk            chunkenc.Chunk
+	MinTime, MaxTime int64
+}
+
+// Returns true if the chunk overlaps [mint, maxt].
+func (mc *MemChunk) OverlapsClosedInterval(mint, maxt int64) bool {
+	return mc.MinTime <= maxt && mint <= mc.MaxTime
+}
+
+type MemSafeIterator struct {
+	chunkenc.Iterator
+
+	i     int
+	total int
+	buf   [4]sample
+}
+
+func (it *MemSafeIterator) Next() bool {
+	if it.i+1 >= it.total {
+		return false
+	}
+	it.i++
+	if it.total-it.i > 4 {
+		return it.Iterator.Next()
+	}
+	return true
+}
+
+func (it *MemSafeIterator) At() (int64, float64) {
+	if it.total-it.i > 4 {
+		return it.Iterator.At()
+	}
+	s := it.buf[4-(it.total-it.i)]
+	return s.t, s.v
+}
+
+func rangeForTimestamp(t int64, width int64) (maxt int64) {
+	return (t/width)*width + width
+}
+
+// computeChunkEndTime estimates the end timestamp based the beginning of a chunk,
+// its current timestamp and the upper bound up to which we insert data.
+// It assumes that the time range is 1/4 full.
+func computeChunkEndTime(start, cur, max int64) int64 {
+	a := (max - start) / ((cur - start + 1) * 4)
+	if a == 0 {
+		return max
+	}
+	return start + (max-start)/a
+}
+
+// RenameFile renames the file from, removing to if it already exists before doing the rename.
+func RenameFile(from, to string) error {
+	if err := os.RemoveAll(to); err != nil {
+		return err
+	}
+	if err := os.Rename(from, to); err != nil {
+		return err
+	}
+
+	// Directory was renamed; sync parent dir to persist rename.
+	pdir, err := fileutil.OpenDir(filepath.Dir(to))
+	if err != nil {
+		return err
+	}
+
+	if err = pdir.Sync(); err != nil {
+		pdir.Close()
+		return err
+	}
+	return pdir.Close()
+}
diff --git a/record.go b/record/record.go
similarity index 97%
rename from record.go
rename to record/record.go
index 8d9c5751..887f9275 100644
--- a/record.go
+++ b/record/record.go
@@ -12,7 +12,7 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 
-package tsdb
+package record
 
 import (
 	"math"
@@ -131,8 +131,8 @@ func (d *RecordDecoder) Tombstones(rec []byte, tstones []Stone) ([]Stone, error)
 	}
 	for dec.Len() > 0 && dec.Err() == nil {
 		tstones = append(tstones, Stone{
-			ref: dec.Be64(),
-			intervals: Intervals{
+			Ref: dec.Be64(),
+			Intervals: Intervals{
 				{Mint: dec.Varint64(), Maxt: dec.Varint64()},
 			},
 		})
@@ -198,8 +198,8 @@ func (e *RecordEncoder) Tombstones(tstones []Stone, b []byte) []byte {
 	buf.PutByte(byte(RecordTombstones))
 
 	for _, s := range tstones {
-		for _, iv := range s.intervals {
-			buf.PutBE64(s.ref)
+		for _, iv := range s.Intervals {
+			buf.PutBE64(s.Ref)
 			buf.PutVarint64(iv.Mint)
 			buf.PutVarint64(iv.Maxt)
 		}
diff --git a/record_test.go b/record/record_test.go
similarity index 89%
rename from record_test.go
rename to record/record_test.go
index 8316ccf3..fdc69514 100644
--- a/record_test.go
+++ b/record/record_test.go
@@ -12,7 +12,7 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 
-package tsdb
+package record
 
 import (
 	"testing"
@@ -55,11 +55,11 @@ func TestRecord_EncodeDecode(t *testing.T) {
 	// Intervals get split up into single entries. So we don't get back exactly
 	// what we put in.
 	tstones := []Stone{
-		{ref: 123, intervals: Intervals{
+		{Ref: 123, Intervals: Intervals{
 			{Mint: -1000, Maxt: 1231231},
 			{Mint: 5000, Maxt: 0},
 		}},
-		{ref: 13, intervals: Intervals{
+		{Ref: 13, Intervals: Intervals{
 			{Mint: -1000, Maxt: -11},
 			{Mint: 5000, Maxt: 1000},
 		}},
@@ -67,10 +67,10 @@ func TestRecord_EncodeDecode(t *testing.T) {
 	decTstones, err := dec.Tombstones(enc.Tombstones(tstones, nil), nil)
 	testutil.Ok(t, err)
 	testutil.Equals(t, []Stone{
-		{ref: 123, intervals: Intervals{{Mint: -1000, Maxt: 1231231}}},
-		{ref: 123, intervals: Intervals{{Mint: 5000, Maxt: 0}}},
-		{ref: 13, intervals: Intervals{{Mint: -1000, Maxt: -11}}},
-		{ref: 13, intervals: Intervals{{Mint: 5000, Maxt: 1000}}},
+		{Ref: 123, Intervals: Intervals{{Mint: -1000, Maxt: 1231231}}},
+		{Ref: 123, Intervals: Intervals{{Mint: 5000, Maxt: 0}}},
+		{Ref: 13, Intervals: Intervals{{Mint: -1000, Maxt: -11}}},
+		{Ref: 13, Intervals: Intervals{{Mint: 5000, Maxt: 1000}}},
 	}, decTstones)
 }
 
@@ -105,7 +105,7 @@ func TestRecord_Corruputed(t *testing.T) {
 
 	t.Run("Test corrupted tombstone record", func(t *testing.T) {
 		tstones := []Stone{
-			{ref: 123, intervals: Intervals{
+			{Ref: 123, Intervals: Intervals{
 				{Mint: -1000, Maxt: 1231231},
 				{Mint: 5000, Maxt: 0},
 			}},
diff --git a/tombstones.go b/record/tombstones.go
similarity index 80%
rename from tombstones.go
rename to record/tombstones.go
index d7b76230..23f62ee7 100644
--- a/tombstones.go
+++ b/record/tombstones.go
@@ -11,7 +11,7 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 
-package tsdb
+package record
 
 import (
 	"encoding/binary"
@@ -30,7 +30,7 @@ import (
 	"github.com/prometheus/tsdb/fileutil"
 )
 
-const tombstoneFilename = "tombstones"
+const TombstoneFilename = "tombstones"
 
 const (
 	// MagicTombstone is 4 bytes at the head of a tombstone file.
@@ -54,7 +54,7 @@ type TombstoneReader interface {
 	Close() error
 }
 
-func writeTombstoneFile(logger log.Logger, dir string, tr TombstoneReader) (int64, error) {
+func WriteTombstoneFile(logger log.Logger, dir string, tr TombstoneReader) (int64, error) {
 	path := filepath.Join(dir, tombstoneFilename)
 	tmp := path + ".tmp"
 	hash := newCRC32()
@@ -129,11 +129,11 @@ func writeTombstoneFile(logger log.Logger, dir string, tr TombstoneReader) (int6
 // Stone holds the information on the posting and time-range
 // that is deleted.
 type Stone struct {
-	ref       uint64
-	intervals Intervals
+	Ref       uint64
+	Intervals Intervals
 }
 
-func readTombstones(dir string) (TombstoneReader, int64, error) {
+func ReadTombstones(dir string) (TombstoneReader, int64, error) {
 	b, err := ioutil.ReadFile(filepath.Join(dir, tombstoneFilename))
 	if os.IsNotExist(err) {
 		return newMemTombstones(), 0, nil
@@ -158,7 +158,7 @@ func readTombstones(dir string) (TombstoneReader, int64, error) {
 	}
 
 	// Verify checksum.
-	hash := newCRC32()
+	hash := NewCRC32()
 	if _, err := hash.Write(d.Get()); err != nil {
 		return nil, 0, errors.Wrap(err, "write to hash")
 	}
@@ -166,7 +166,7 @@ func readTombstones(dir string) (TombstoneReader, int64, error) {
 		return nil, 0, errors.New("checksum did not match")
 	}
 
-	stonesMap := newMemTombstones()
+	stonesMap := NewMemTombstones()
 
 	for d.Len() > 0 {
 		k := d.Uvarint64()
@@ -176,33 +176,33 @@ func readTombstones(dir string) (TombstoneReader, int64, error) {
 			return nil, 0, d.Err()
 		}
 
-		stonesMap.addInterval(k, Interval{mint, maxt})
+		stonesMap.AddInterval(k, Interval{mint, maxt})
 	}
 
 	return stonesMap, int64(len(b)), nil
 }
 
-type memTombstones struct {
-	intvlGroups map[uint64]Intervals
+type MemTombstones struct {
+	IntvlGroups map[uint64]Intervals
 	mtx         sync.RWMutex
 }
 
-// newMemTombstones creates new in memory TombstoneReader
+// NewMemTombstones creates new in memory TombstoneReader
 // that allows adding new intervals.
-func newMemTombstones() *memTombstones {
-	return &memTombstones{intvlGroups: make(map[uint64]Intervals)}
+func NewMemTombstones() *MemTombstones {
+	return &MemTombstones{IntvlGroups: make(map[uint64]Intervals)}
 }
 
-func (t *memTombstones) Get(ref uint64) (Intervals, error) {
+func (t *MemTombstones) Get(ref uint64) (Intervals, error) {
 	t.mtx.RLock()
 	defer t.mtx.RUnlock()
-	return t.intvlGroups[ref], nil
+	return t.IntvlGroups[ref], nil
 }
 
-func (t *memTombstones) Iter(f func(uint64, Intervals) error) error {
+func (t *MemTombstones) Iter(f func(uint64, Intervals) error) error {
 	t.mtx.RLock()
 	defer t.mtx.RUnlock()
-	for ref, ivs := range t.intvlGroups {
+	for ref, ivs := range t.IntvlGroups {
 		if err := f(ref, ivs); err != nil {
 			return err
 		}
@@ -210,23 +210,23 @@ func (t *memTombstones) Iter(f func(uint64, Intervals) error) error {
 	return nil
 }
 
-func (t *memTombstones) Total() uint64 {
+func (t *MemTombstones) Total() uint64 {
 	t.mtx.RLock()
 	defer t.mtx.RUnlock()
 
 	total := uint64(0)
-	for _, ivs := range t.intvlGroups {
+	for _, ivs := range t.IntvlGroups {
 		total += uint64(len(ivs))
 	}
 	return total
 }
 
-// addInterval to an existing memTombstones
-func (t *memTombstones) addInterval(ref uint64, itvs ...Interval) {
+// AddInterval to an existing MemTombstones
+func (t *MemTombstones) AddInterval(ref uint64, itvs ...Interval) {
 	t.mtx.Lock()
 	defer t.mtx.Unlock()
 	for _, itv := range itvs {
-		t.intvlGroups[ref] = t.intvlGroups[ref].add(itv)
+		t.IntvlGroups[ref] = t.IntvlGroups[ref].Add(itv)
 	}
 }
 
@@ -239,13 +239,13 @@ type Interval struct {
 	Mint, Maxt int64
 }
 
-func (tr Interval) inBounds(t int64) bool {
+func (tr Interval) InBounds(t int64) bool {
 	return t >= tr.Mint && t <= tr.Maxt
 }
 
-func (tr Interval) isSubrange(dranges Intervals) bool {
+func (tr Interval) IsSubrange(dranges Intervals) bool {
 	for _, r := range dranges {
-		if r.inBounds(tr.Mint) && r.inBounds(tr.Maxt) {
+		if r.InBounds(tr.Mint) && r.InBounds(tr.Maxt) {
 			return true
 		}
 	}
@@ -256,12 +256,12 @@ func (tr Interval) isSubrange(dranges Intervals) bool {
 // Intervals represents	a set of increasing and non-overlapping time-intervals.
 type Intervals []Interval
 
-// add the new time-range to the existing ones.
+// Add the new time-range to the existing ones.
 // The existing ones must be sorted.
-func (itvs Intervals) add(n Interval) Intervals {
+func (itvs Intervals) Add(n Interval) Intervals {
 	for i, r := range itvs {
 		// TODO(gouthamve): Make this codepath easier to digest.
-		if r.inBounds(n.Mint-1) || r.inBounds(n.Mint) {
+		if r.InBounds(n.Mint-1) || r.InBounds(n.Mint) {
 			if n.Maxt > r.Maxt {
 				itvs[i].Maxt = n.Maxt
 			}
@@ -282,7 +282,7 @@ func (itvs Intervals) add(n Interval) Intervals {
 			return itvs
 		}
 
-		if r.inBounds(n.Maxt+1) || r.inBounds(n.Maxt) {
+		if r.InBounds(n.Maxt+1) || r.InBounds(n.Maxt) {
 			if n.Mint < r.Maxt {
 				itvs[i].Mint = n.Mint
 			}
diff --git a/tombstones_test.go b/record/tombstones_test.go
similarity index 89%
rename from tombstones_test.go
rename to record/tombstones_test.go
index 33ebb3bc..1d8bb8e6 100644
--- a/tombstones_test.go
+++ b/record/tombstones_test.go
@@ -11,7 +11,7 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 
-package tsdb
+package record
 
 import (
 	"io/ioutil"
@@ -33,7 +33,7 @@ func TestWriteAndReadbackTombStones(t *testing.T) {
 
 	ref := uint64(0)
 
-	stones := newMemTombstones()
+	stones := NewMemTombstones()
 	// Generate the tombstones.
 	for i := 0; i < 100; i++ {
 		ref += uint64(rand.Int31n(10)) + 1
@@ -41,16 +41,16 @@ func TestWriteAndReadbackTombStones(t *testing.T) {
 		dranges := make(Intervals, 0, numRanges)
 		mint := rand.Int63n(time.Now().UnixNano())
 		for j := 0; j < numRanges; j++ {
-			dranges = dranges.add(Interval{mint, mint + rand.Int63n(1000)})
+			dranges = dranges.Add(Interval{mint, mint + rand.Int63n(1000)})
 			mint += rand.Int63n(1000) + 1
 		}
-		stones.addInterval(ref, dranges...)
+		stones.AddInterval(ref, dranges...)
 	}
 
-	_, err := writeTombstoneFile(log.NewNopLogger(), tmpdir, stones)
+	_, err := WriteTombstoneFile(log.NewNopLogger(), tmpdir, stones)
 	testutil.Ok(t, err)
 
-	restr, _, err := readTombstones(tmpdir)
+	restr, _, err := ReadTombstones(tmpdir)
 	testutil.Ok(t, err)
 
 	// Compare the two readers.
@@ -122,20 +122,20 @@ func TestAddingNewIntervals(t *testing.T) {
 
 	for _, c := range cases {
 
-		testutil.Equals(t, c.exp, c.exist.add(c.new))
+		testutil.Equals(t, c.exp, c.exist.Add(c.new))
 	}
 }
 
 // TestMemTombstonesConcurrency to make sure they are safe to access from different goroutines.
 func TestMemTombstonesConcurrency(t *testing.T) {
-	tomb := newMemTombstones()
+	tomb := NewMemTombstones()
 	totalRuns := 100
 	var wg sync.WaitGroup
 	wg.Add(2)
 
 	go func() {
 		for x := 0; x < totalRuns; x++ {
-			tomb.addInterval(uint64(x), Interval{int64(x), int64(x)})
+			tomb.AddInterval(uint64(x), Interval{int64(x), int64(x)})
 		}
 		wg.Done()
 	}()
diff --git a/wal.go b/wal.go
index 49f55fe4..27e4c0ba 100644
--- a/wal.go
+++ b/wal.go
@@ -34,6 +34,7 @@ import (
 	"github.com/prometheus/tsdb/encoding"
 	"github.com/prometheus/tsdb/fileutil"
 	"github.com/prometheus/tsdb/labels"
+	"github.com/prometheus/tsdb/record"
 	"github.com/prometheus/tsdb/wal"
 )
 
@@ -89,9 +90,9 @@ func newWalMetrics(wal *SegmentWAL, r prometheus.Registerer) *walMetrics {
 // DEPRECATED: use wal pkg combined with the record codex instead.
 type WAL interface {
 	Reader() WALReader
-	LogSeries([]RefSeries) error
-	LogSamples([]RefSample) error
-	LogDeletes([]Stone) error
+	LogSeries([]record.RefSeries) error
+	LogSamples([]record.RefSample) error
+	LogDeletes([]record.Stone) error
 	Truncate(mint int64, keep func(uint64) bool) error
 	Close() error
 }
@@ -99,27 +100,12 @@ type WAL interface {
 // WALReader reads entries from a WAL.
 type WALReader interface {
 	Read(
-		seriesf func([]RefSeries),
-		samplesf func([]RefSample),
-		deletesf func([]Stone),
+		seriesf func([]record.RefSeries),
+		samplesf func([]record.RefSample),
+		deletesf func([]record.Stone),
 	) error
 }
 
-// RefSeries is the series labels with the series ID.
-type RefSeries struct {
-	Ref    uint64
-	Labels labels.Labels
-}
-
-// RefSample is a timestamp/value pair associated with a reference to a series.
-type RefSample struct {
-	Ref uint64
-	T   int64
-	V   float64
-
-	series *memSeries
-}
-
 // segmentFile wraps a file object of a segment and tracks the highest timestamp
 // it contains. During WAL truncating, all segments with no higher timestamp than
 // the truncation threshold can be compacted.
@@ -240,9 +226,9 @@ type repairingWALReader struct {
 }
 
 func (r *repairingWALReader) Read(
-	seriesf func([]RefSeries),
-	samplesf func([]RefSample),
-	deletesf func([]Stone),
+	seriesf func([]record.RefSeries),
+	samplesf func([]record.RefSample),
+	deletesf func([]record.Stone),
 ) error {
 	err := r.r.Read(seriesf, samplesf, deletesf)
 	if err == nil {
@@ -348,8 +334,8 @@ func (w *SegmentWAL) Truncate(mint int64, keep func(uint64) bool) error {
 	var (
 		csf          = newSegmentFile(f)
 		crc32        = newCRC32()
-		decSeries    = []RefSeries{}
-		activeSeries = []RefSeries{}
+		decSeries    = []record.RefSeries{}
+		activeSeries = []record.RefSeries{}
 	)
 
 	for r.next() {
@@ -427,7 +413,7 @@ func (w *SegmentWAL) Truncate(mint int64, keep func(uint64) bool) error {
 
 // LogSeries writes a batch of new series labels to the log.
 // The series have to be ordered.
-func (w *SegmentWAL) LogSeries(series []RefSeries) error {
+func (w *SegmentWAL) LogSeries(series []record.RefSeries) error {
 	buf := w.getBuffer()
 
 	flag := w.encodeSeries(buf, series)
@@ -454,7 +440,7 @@ func (w *SegmentWAL) LogSeries(series []RefSeries) error {
 }
 
 // LogSamples writes a batch of new samples to the log.
-func (w *SegmentWAL) LogSamples(samples []RefSample) error {
+func (w *SegmentWAL) LogSamples(samples []record.RefSample) error {
 	buf := w.getBuffer()
 
 	flag := w.encodeSamples(buf, samples)
@@ -480,7 +466,7 @@ func (w *SegmentWAL) LogSamples(samples []RefSample) error {
 }
 
 // LogDeletes write a batch of new deletes to the log.
-func (w *SegmentWAL) LogDeletes(stones []Stone) error {
+func (w *SegmentWAL) LogDeletes(stones []record.Stone) error {
 	buf := w.getBuffer()
 
 	flag := w.encodeDeletes(buf, stones)
@@ -498,7 +484,7 @@ func (w *SegmentWAL) LogDeletes(stones []Stone) error {
 	tf := w.head()
 
 	for _, s := range stones {
-		for _, iv := range s.intervals {
+		for _, iv := range s.Intervals {
 			if tf.maxTime < iv.Maxt {
 				tf.maxTime = iv.Maxt
 			}
@@ -791,7 +777,7 @@ const (
 	walDeletesSimple = 1
 )
 
-func (w *SegmentWAL) encodeSeries(buf *encoding.Encbuf, series []RefSeries) uint8 {
+func (w *SegmentWAL) encodeSeries(buf *encoding.Encbuf, series []record.RefSeries) uint8 {
 	for _, s := range series {
 		buf.PutBE64(s.Ref)
 		buf.PutUvarint(len(s.Labels))
@@ -804,7 +790,7 @@ func (w *SegmentWAL) encodeSeries(buf *encoding.Encbuf, series []RefSeries) uint
 	return walSeriesSimple
 }
 
-func (w *SegmentWAL) encodeSamples(buf *encoding.Encbuf, samples []RefSample) uint8 {
+func (w *SegmentWAL) encodeSamples(buf *encoding.Encbuf, samples []record.RefSample) uint8 {
 	if len(samples) == 0 {
 		return walSamplesSimple
 	}
@@ -825,10 +811,10 @@ func (w *SegmentWAL) encodeSamples(buf *encoding.Encbuf, samples []RefSample) ui
 	return walSamplesSimple
 }
 
-func (w *SegmentWAL) encodeDeletes(buf *encoding.Encbuf, stones []Stone) uint8 {
+func (w *SegmentWAL) encodeDeletes(buf *encoding.Encbuf, stones []record.Stone) uint8 {
 	for _, s := range stones {
-		for _, iv := range s.intervals {
-			buf.PutBE64(s.ref)
+		for _, iv := range s.Intervals {
+			buf.PutBE64(s.Ref)
 			buf.PutVarint64(iv.Mint)
 			buf.PutVarint64(iv.Maxt)
 		}
@@ -871,9 +857,9 @@ func (r *walReader) Err() error {
 }
 
 func (r *walReader) Read(
-	seriesf func([]RefSeries),
-	samplesf func([]RefSample),
-	deletesf func([]Stone),
+	seriesf func([]record.RefSeries),
+	samplesf func([]record.RefSample),
+	deletesf func([]record.Stone),
 ) error {
 	// Concurrency for replaying the WAL is very limited. We at least split out decoding and
 	// processing into separate threads.
@@ -892,19 +878,19 @@ func (r *walReader) Read(
 
 		for x := range datac {
 			switch v := x.(type) {
-			case []RefSeries:
+			case []record.RefSeries:
 				if seriesf != nil {
 					seriesf(v)
 				}
 				//lint:ignore SA6002 safe to ignore and actually fixing it has some performance penalty.
 				seriesPool.Put(v[:0])
-			case []RefSample:
+			case []record.RefSample:
 				if samplesf != nil {
 					samplesf(v)
 				}
 				//lint:ignore SA6002 safe to ignore and actually fixing it has some performance penalty.
 				samplePool.Put(v[:0])
-			case []Stone:
+			case []record.Stone:
 				if deletesf != nil {
 					deletesf(v)
 				}
@@ -925,11 +911,11 @@ func (r *walReader) Read(
 		// Those should generally be catched by entry decoding before.
 		switch et {
 		case WALEntrySeries:
-			var series []RefSeries
+			var series []record.RefSeries
 			if v := seriesPool.Get(); v == nil {
-				series = make([]RefSeries, 0, 512)
+				series = make([]record.RefSeries, 0, 512)
 			} else {
-				series = v.([]RefSeries)
+				series = v.([]record.RefSeries)
 			}
 
 			err = r.decodeSeries(flag, b, &series)
@@ -946,11 +932,11 @@ func (r *walReader) Read(
 				}
 			}
 		case WALEntrySamples:
-			var samples []RefSample
+			var samples []record.RefSample
 			if v := samplePool.Get(); v == nil {
-				samples = make([]RefSample, 0, 512)
+				samples = make([]record.RefSample, 0, 512)
 			} else {
-				samples = v.([]RefSample)
+				samples = v.([]record.RefSample)
 			}
 
 			err = r.decodeSamples(flag, b, &samples)
@@ -968,11 +954,11 @@ func (r *walReader) Read(
 				}
 			}
 		case WALEntryDeletes:
-			var deletes []Stone
+			var deletes []record.Stone
 			if v := deletePool.Get(); v == nil {
-				deletes = make([]Stone, 0, 512)
+				deletes = make([]record.Stone, 0, 512)
 			} else {
-				deletes = v.([]Stone)
+				deletes = v.([]record.Stone)
 			}
 
 			err = r.decodeDeletes(flag, b, &deletes)
@@ -985,7 +971,7 @@ func (r *walReader) Read(
 			// Update the times for the WAL segment file.
 			cf := r.current()
 			for _, s := range deletes {
-				for _, iv := range s.intervals {
+				for _, iv := range s.Intervals {
 					if cf.maxTime < iv.Maxt {
 						cf.maxTime = iv.Maxt
 					}
@@ -1122,7 +1108,7 @@ func (r *walReader) entry(cr io.Reader) (WALEntryType, byte, []byte, error) {
 	return etype, flag, buf, nil
 }
 
-func (r *walReader) decodeSeries(flag byte, b []byte, res *[]RefSeries) error {
+func (r *walReader) decodeSeries(flag byte, b []byte, res *[]record.RefSeries) error {
 	dec := encoding.Decbuf{B: b}
 
 	for len(dec.B) > 0 && dec.Err() == nil {
@@ -1136,7 +1122,7 @@ func (r *walReader) decodeSeries(flag byte, b []byte, res *[]RefSeries) error {
 		}
 		sort.Sort(lset)
 
-		*res = append(*res, RefSeries{
+		*res = append(*res, record.RefSeries{
 			Ref:    ref,
 			Labels: lset,
 		})
@@ -1150,7 +1136,7 @@ func (r *walReader) decodeSeries(flag byte, b []byte, res *[]RefSeries) error {
 	return nil
 }
 
-func (r *walReader) decodeSamples(flag byte, b []byte, res *[]RefSample) error {
+func (r *walReader) decodeSamples(flag byte, b []byte, res *[]record.RefSample) error {
 	if len(b) == 0 {
 		return nil
 	}
@@ -1166,7 +1152,7 @@ func (r *walReader) decodeSamples(flag byte, b []byte, res *[]RefSample) error {
 		dtime := dec.Varint64()
 		val := dec.Be64()
 
-		*res = append(*res, RefSample{
+		*res = append(*res, record.RefSample{
 			Ref: uint64(int64(baseRef) + dref),
 			T:   baseTime + dtime,
 			V:   math.Float64frombits(val),
@@ -1182,13 +1168,13 @@ func (r *walReader) decodeSamples(flag byte, b []byte, res *[]RefSample) error {
 	return nil
 }
 
-func (r *walReader) decodeDeletes(flag byte, b []byte, res *[]Stone) error {
+func (r *walReader) decodeDeletes(flag byte, b []byte, res *[]record.Stone) error {
 	dec := &encoding.Decbuf{B: b}
 
 	for dec.Len() > 0 && dec.Err() == nil {
-		*res = append(*res, Stone{
-			ref: dec.Be64(),
-			intervals: Intervals{
+		*res = append(*res, record.Stone{
+			Ref: dec.Be64(),
+			Intervals: record.Intervals{
 				{Mint: dec.Varint64(), Maxt: dec.Varint64()},
 			},
 		})
@@ -1268,23 +1254,23 @@ func MigrateWAL(logger log.Logger, dir string) (err error) {
 	rdr := w.Reader()
 
 	var (
-		enc RecordEncoder
+		enc record.RecordEncoder
 		b   []byte
 	)
 	decErr := rdr.Read(
-		func(s []RefSeries) {
+		func(s []record.RefSeries) {
 			if err != nil {
 				return
 			}
 			err = repl.Log(enc.Series(s, b[:0]))
 		},
-		func(s []RefSample) {
+		func(s []record.RefSample) {
 			if err != nil {
 				return
 			}
 			err = repl.Log(enc.Samples(s, b[:0]))
 		},
-		func(s []Stone) {
+		func(s []record.Stone) {
 			if err != nil {
 				return
 			}
diff --git a/checkpoint.go b/wal/checkpoint.go
similarity index 88%
rename from checkpoint.go
rename to wal/checkpoint.go
index eccfa62b..d9595ff3 100644
--- a/checkpoint.go
+++ b/wal/checkpoint.go
@@ -12,7 +12,7 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 
-package tsdb
+package wal
 
 import (
 	"fmt"
@@ -27,7 +27,7 @@ import (
 	"github.com/pkg/errors"
 	tsdb_errors "github.com/prometheus/tsdb/errors"
 	"github.com/prometheus/tsdb/fileutil"
-	"github.com/prometheus/tsdb/wal"
+	"github.com/prometheus/tsdb/record"
 )
 
 // CheckpointStats returns stats about a created checkpoint.
@@ -63,7 +63,7 @@ func LastCheckpoint(dir string) (string, int, error) {
 		}
 		return filepath.Join(dir, fi.Name()), idx, nil
 	}
-	return "", 0, ErrNotFound
+	return "", 0, record.ErrNotFound
 }
 
 // DeleteCheckpoints deletes all checkpoints in a directory below a given index.
@@ -99,15 +99,15 @@ const checkpointPrefix = "checkpoint."
 // segmented format as the original WAL itself.
 // This makes it easy to read it through the WAL package and concatenate
 // it with the original WAL.
-func Checkpoint(w *wal.WAL, from, to int, keep func(id uint64) bool, mint int64) (*CheckpointStats, error) {
+func Checkpoint(w *WAL, from, to int, keep func(id uint64) bool, mint int64) (*CheckpointStats, error) {
 	stats := &CheckpointStats{}
 	var sgmReader io.ReadCloser
 
 	{
 
-		var sgmRange []wal.SegmentRange
+		var sgmRange []SegmentRange
 		dir, idx, err := LastCheckpoint(w.Dir())
-		if err != nil && err != ErrNotFound {
+		if err != nil && err != record.ErrNotFound {
 			return nil, errors.Wrap(err, "find last checkpoint")
 		}
 		last := idx + 1
@@ -118,11 +118,11 @@ func Checkpoint(w *wal.WAL, from, to int, keep func(id uint64) bool, mint int64)
 			// Ignore WAL files below the checkpoint. They shouldn't exist to begin with.
 			from = last
 
-			sgmRange = append(sgmRange, wal.SegmentRange{Dir: dir, Last: math.MaxInt32})
+			sgmRange = append(sgmRange, SegmentRange{Dir: dir, Last: math.MaxInt32})
 		}
 
-		sgmRange = append(sgmRange, wal.SegmentRange{Dir: w.Dir(), First: from, Last: to})
-		sgmReader, err = wal.NewSegmentsRangeReader(sgmRange...)
+		sgmRange = append(sgmRange, SegmentRange{Dir: w.Dir(), First: from, Last: to})
+		sgmReader, err = NewSegmentsRangeReader(sgmRange...)
 		if err != nil {
 			return nil, errors.Wrap(err, "create segment reader")
 		}
@@ -135,7 +135,7 @@ func Checkpoint(w *wal.WAL, from, to int, keep func(id uint64) bool, mint int64)
 	if err := os.MkdirAll(cpdirtmp, 0777); err != nil {
 		return nil, errors.Wrap(err, "create checkpoint dir")
 	}
-	cp, err := wal.New(nil, nil, cpdirtmp, w.CompressionEnabled())
+	cp, err := New(nil, nil, cpdirtmp, w.CompressionEnabled())
 	if err != nil {
 		return nil, errors.Wrap(err, "open checkpoint")
 	}
@@ -146,14 +146,14 @@ func Checkpoint(w *wal.WAL, from, to int, keep func(id uint64) bool, mint int64)
 		os.RemoveAll(cpdirtmp)
 	}()
 
-	r := wal.NewReader(sgmReader)
+	r := NewReader(sgmReader)
 
 	var (
-		series  []RefSeries
-		samples []RefSample
-		tstones []Stone
-		dec     RecordDecoder
-		enc     RecordEncoder
+		series  []record.RefSeries
+		samples []record.RefSample
+		tstones []record.Stone
+		dec     record.RecordDecoder
+		enc     record.RecordEncoder
 		buf     []byte
 		recs    [][]byte
 	)
@@ -167,7 +167,7 @@ func Checkpoint(w *wal.WAL, from, to int, keep func(id uint64) bool, mint int64)
 		rec := r.Record()
 
 		switch dec.Type(rec) {
-		case RecordSeries:
+		case record.RecordSeries:
 			series, err = dec.Series(rec, series)
 			if err != nil {
 				return nil, errors.Wrap(err, "decode series")
@@ -185,7 +185,7 @@ func Checkpoint(w *wal.WAL, from, to int, keep func(id uint64) bool, mint int64)
 			stats.TotalSeries += len(series)
 			stats.DroppedSeries += len(series) - len(repl)
 
-		case RecordSamples:
+		case record.RecordSamples:
 			samples, err = dec.Samples(rec, samples)
 			if err != nil {
 				return nil, errors.Wrap(err, "decode samples")
@@ -203,7 +203,7 @@ func Checkpoint(w *wal.WAL, from, to int, keep func(id uint64) bool, mint int64)
 			stats.TotalSamples += len(samples)
 			stats.DroppedSamples += len(samples) - len(repl)
 
-		case RecordTombstones:
+		case record.RecordTombstones:
 			tstones, err = dec.Tombstones(rec, tstones)
 			if err != nil {
 				return nil, errors.Wrap(err, "decode deletes")
@@ -211,7 +211,7 @@ func Checkpoint(w *wal.WAL, from, to int, keep func(id uint64) bool, mint int64)
 			// Drop irrelevant tombstones in place.
 			repl := tstones[:0]
 			for _, s := range tstones {
-				for _, iv := range s.intervals {
+				for _, iv := range s.Intervals {
 					if iv.Maxt >= mint {
 						repl = append(repl, s)
 						break
diff --git a/checkpoint_test.go b/wal/checkpoint_test.go
similarity index 89%
rename from checkpoint_test.go
rename to wal/checkpoint_test.go
index 0779894b..37e52263 100644
--- a/checkpoint_test.go
+++ b/wal/checkpoint_test.go
@@ -12,7 +12,7 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 
-package tsdb
+package wal
 
 import (
 	"fmt"
@@ -25,8 +25,8 @@ import (
 	"github.com/pkg/errors"
 	"github.com/prometheus/tsdb/fileutil"
 	"github.com/prometheus/tsdb/labels"
+	"github.com/prometheus/tsdb/record"
 	"github.com/prometheus/tsdb/testutil"
-	"github.com/prometheus/tsdb/wal"
 )
 
 func TestLastCheckpoint(t *testing.T) {
@@ -37,7 +37,7 @@ func TestLastCheckpoint(t *testing.T) {
 	}()
 
 	_, _, err = LastCheckpoint(dir)
-	testutil.Equals(t, ErrNotFound, err)
+	testutil.Equals(t, record.ErrNotFound, err)
 
 	testutil.Ok(t, os.MkdirAll(filepath.Join(dir, "checkpoint.0000"), 0777))
 	s, k, err := LastCheckpoint(dir)
@@ -94,18 +94,18 @@ func TestCheckpoint(t *testing.T) {
 				testutil.Ok(t, os.RemoveAll(dir))
 			}()
 
-			var enc RecordEncoder
+			var enc record.RecordEncoder
 			// Create a dummy segment to bump the initial number.
-			seg, err := wal.CreateSegment(dir, 100)
+			seg, err := CreateSegment(dir, 100)
 			testutil.Ok(t, err)
 			testutil.Ok(t, seg.Close())
 
 			// Manually create checkpoint for 99 and earlier.
-			w, err := wal.New(nil, nil, filepath.Join(dir, "checkpoint.0099"), compress)
+			w, err := New(nil, nil, filepath.Join(dir, "checkpoint.0099"), compress)
 			testutil.Ok(t, err)
 
 			// Add some data we expect to be around later.
-			err = w.Log(enc.Series([]RefSeries{
+			err = w.Log(enc.Series([]record.RefSeries{
 				{Ref: 0, Labels: labels.FromStrings("a", "b", "c", "0")},
 				{Ref: 1, Labels: labels.FromStrings("a", "b", "c", "1")},
 			}, nil))
@@ -113,7 +113,7 @@ func TestCheckpoint(t *testing.T) {
 			testutil.Ok(t, w.Close())
 
 			// Start a WAL and write records to it as usual.
-			w, err = wal.NewSize(nil, nil, dir, 64*1024, compress)
+			w, err = NewSize(nil, nil, dir, 64*1024, compress)
 			testutil.Ok(t, err)
 
 			var last int64
@@ -125,7 +125,7 @@ func TestCheckpoint(t *testing.T) {
 				}
 				// Write some series initially.
 				if i == 0 {
-					b := enc.Series([]RefSeries{
+					b := enc.Series([]record.RefSeries{
 						{Ref: 2, Labels: labels.FromStrings("a", "b", "c", "2")},
 						{Ref: 3, Labels: labels.FromStrings("a", "b", "c", "3")},
 						{Ref: 4, Labels: labels.FromStrings("a", "b", "c", "4")},
@@ -136,7 +136,7 @@ func TestCheckpoint(t *testing.T) {
 				// Write samples until the WAL has enough segments.
 				// Make them have drifting timestamps within a record to see that they
 				// get filtered properly.
-				b := enc.Samples([]RefSample{
+				b := enc.Samples([]record.RefSample{
 					{Ref: 0, T: last, V: float64(i)},
 					{Ref: 1, T: last + 10000, V: float64(i)},
 					{Ref: 2, T: last + 20000, V: float64(i)},
@@ -161,22 +161,22 @@ func TestCheckpoint(t *testing.T) {
 			testutil.Equals(t, 1, len(files))
 			testutil.Equals(t, "checkpoint.000106", files[0])
 
-			sr, err := wal.NewSegmentsReader(filepath.Join(dir, "checkpoint.000106"))
+			sr, err := NewSegmentsReader(filepath.Join(dir, "checkpoint.000106"))
 			testutil.Ok(t, err)
 			defer sr.Close()
 
-			var dec RecordDecoder
-			var series []RefSeries
-			r := wal.NewReader(sr)
+			var dec record.RecordDecoder
+			var series []record.RefSeries
+			r := NewReader(sr)
 
 			for r.Next() {
 				rec := r.Record()
 
 				switch dec.Type(rec) {
-				case RecordSeries:
+				case record.RecordSeries:
 					series, err = dec.Series(rec, series)
 					testutil.Ok(t, err)
-				case RecordSamples:
+				case record.RecordSamples:
 					samples, err := dec.Samples(rec, nil)
 					testutil.Ok(t, err)
 					for _, s := range samples {
@@ -185,7 +185,7 @@ func TestCheckpoint(t *testing.T) {
 				}
 			}
 			testutil.Ok(t, r.Err())
-			testutil.Equals(t, []RefSeries{
+			testutil.Equals(t, []record.RefSeries{
 				{Ref: 0, Labels: labels.FromStrings("a", "b", "c", "0")},
 				{Ref: 2, Labels: labels.FromStrings("a", "b", "c", "2")},
 				{Ref: 4, Labels: labels.FromStrings("a", "b", "c", "4")},
@@ -201,7 +201,7 @@ func TestCheckpointNoTmpFolderAfterError(t *testing.T) {
 	defer func() {
 		testutil.Ok(t, os.RemoveAll(dir))
 	}()
-	w, err := wal.NewSize(nil, nil, dir, 64*1024, false)
+	w, err := NewSize(nil, nil, dir, 64*1024, false)
 	testutil.Ok(t, err)
 	testutil.Ok(t, w.Log([]byte{99}))
 	w.Close()
diff --git a/wal/reader_test.go b/wal/reader_test.go
index 96d15225..0bb0cb13 100644
--- a/wal/reader_test.go
+++ b/wal/reader_test.go
@@ -41,7 +41,7 @@ type reader interface {
 	Offset() int64
 }
 
-type record struct {
+type rec struct {
 	t recType
 	b []byte
 }
@@ -59,13 +59,13 @@ var readerConstructors = map[string]func(io.Reader) reader{
 
 var data = make([]byte, 100000)
 var testReaderCases = []struct {
-	t    []record
+	t    []rec
 	exp  [][]byte
 	fail bool
 }{
 	// Sequence of valid records.
 	{
-		t: []record{
+		t: []rec{
 			{recFull, data[0:200]},
 			{recFirst, data[200:300]},
 			{recLast, data[300:400]},
@@ -89,7 +89,7 @@ var testReaderCases = []struct {
 	},
 	// Exactly at the limit of one page minus the header size
 	{
-		t: []record{
+		t: []rec{
 			{recFull, data[0 : pageSize-recordHeaderSize]},
 		},
 		exp: [][]byte{
@@ -99,7 +99,7 @@ var testReaderCases = []struct {
 	// More than a full page, this exceeds our buffer and can never happen
 	// when written by the WAL.
 	{
-		t: []record{
+		t: []rec{
 			{recFull, data[0 : pageSize+1]},
 		},
 		fail: true,
@@ -108,7 +108,7 @@ var testReaderCases = []struct {
 	// NB currently the non-live reader succeeds on this. I think this is a bug.
 	// but we've seen it in production.
 	{
-		t: []record{
+		t: []rec{
 			{recFull, data[:pageSize/2]},
 			{recFull, data[:pageSize/2]},
 		},
@@ -119,22 +119,22 @@ var testReaderCases = []struct {
 	},
 	// Invalid orders of record types.
 	{
-		t:    []record{{recMiddle, data[:200]}},
+		t:    []rec{{recMiddle, data[:200]}},
 		fail: true,
 	},
 	{
-		t:    []record{{recLast, data[:200]}},
+		t:    []rec{{recLast, data[:200]}},
 		fail: true,
 	},
 	{
-		t: []record{
+		t: []rec{
 			{recFirst, data[:200]},
 			{recFull, data[200:400]},
 		},
 		fail: true,
 	},
 	{
-		t: []record{
+		t: []rec{
 			{recFirst, data[:100]},
 			{recMiddle, data[100:200]},
 			{recFull, data[200:400]},
@@ -143,7 +143,7 @@ var testReaderCases = []struct {
 	},
 	// Non-zero data after page termination.
 	{
-		t: []record{
+		t: []rec{
 			{recFull, data[:100]},
 			{recPageTerm, append(make([]byte, pageSize-recordHeaderSize-102), 1)},
 		},
diff --git a/wal/wal_watcher.go b/wal/wal_watcher.go
new file mode 100644
index 00000000..f9f7776c
--- /dev/null
+++ b/wal/wal_watcher.go
@@ -0,0 +1,556 @@
+// Copyright 2018 The Prometheus Authors
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package wal
+
+import (
+	"fmt"
+	"io"
+	"math"
+	"os"
+	"path"
+	"sort"
+	"strconv"
+	"strings"
+	"time"
+
+	"github.com/go-kit/kit/log"
+	"github.com/go-kit/kit/log/level"
+	"github.com/pkg/errors"
+	"github.com/prometheus/client_golang/prometheus"
+	"github.com/prometheus/prometheus/pkg/timestamp"
+	"github.com/prometheus/tsdb/fileutil"
+	"github.com/prometheus/tsdb/record"
+)
+
+const (
+	readPeriod         = 10 * time.Millisecond
+	checkpointPeriod   = 5 * time.Second
+	segmentCheckPeriod = 100 * time.Millisecond
+	consumer           = "consumer"
+)
+
+var (
+	watcherRecordsRead = prometheus.NewCounterVec(
+		prometheus.CounterOpts{
+			Namespace: "prometheus",
+			Subsystem: "wal_watcher",
+			Name:      "records_read_total",
+			Help:      "Number of records read by the WAL watcher from the WAL.",
+		},
+		[]string{consumer, "type"},
+	)
+	watcherRecordDecodeFails = prometheus.NewCounterVec(
+		prometheus.CounterOpts{
+			Namespace: "prometheus",
+			Subsystem: "wal_watcher",
+			Name:      "record_decode_failures_total",
+			Help:      "Number of records read by the WAL watcher that resulted in an error when decoding.",
+		},
+		[]string{consumer},
+	)
+	watcherSamplesSentPreTailing = prometheus.NewCounterVec(
+		prometheus.CounterOpts{
+			Namespace: "prometheus",
+			Subsystem: "wal_watcher",
+			Name:      "samples_sent_pre_tailing_total",
+			Help:      "Number of sample records read by the WAL watcher and sent to remote write during replay of existing WAL.",
+		},
+		[]string{consumer},
+	)
+	watcherCurrentSegment = prometheus.NewGaugeVec(
+		prometheus.GaugeOpts{
+			Namespace: "prometheus",
+			Subsystem: "wal_watcher",
+			Name:      "current_segment",
+			Help:      "Current segment the WAL watcher is reading records from.",
+		},
+		[]string{consumer},
+	)
+)
+
+func init() {
+	prometheus.MustRegister(watcherRecordsRead)
+	prometheus.MustRegister(watcherRecordDecodeFails)
+	prometheus.MustRegister(watcherSamplesSentPreTailing)
+	prometheus.MustRegister(watcherCurrentSegment)
+}
+
+type writeTo interface {
+	Append([]record.RefSample) bool
+	StoreSeries([]record.RefSeries, int)
+	SeriesReset(int)
+}
+
+// WALWatcher watches the TSDB WAL for a given WriteTo.
+type WALWatcher struct {
+	name           string
+	writer         writeTo
+	logger         log.Logger
+	walDir         string
+	lastCheckpoint string
+
+	startTime int64
+
+	recordsReadMetric       *prometheus.CounterVec
+	recordDecodeFailsMetric prometheus.Counter
+	samplesSentPreTailing   prometheus.Counter
+	currentSegmentMetric    prometheus.Gauge
+
+	quit chan struct{}
+	done chan struct{}
+
+	// For testing, stop when we hit this segment.
+	maxSegment int
+}
+
+// NewWALWatcher creates a new WAL watcher for a given WriteTo.
+func NewWALWatcher(logger log.Logger, name string, writer writeTo, walDir string) *WALWatcher {
+	if logger == nil {
+		logger = log.NewNopLogger()
+	}
+	return &WALWatcher{
+		logger: logger,
+		writer: writer,
+		walDir: path.Join(walDir, "wal"),
+		name:   name,
+		quit:   make(chan struct{}),
+		done:   make(chan struct{}),
+
+		maxSegment: -1,
+	}
+}
+
+func (w *WALWatcher) setMetrics() {
+	// Setup the WAL Watchers metrics. We do this here rather than in the
+	// constructor because of the ordering of creating Queue Managers's,
+	// stopping them, and then starting new ones in storage/remote/storage.go ApplyConfig.
+	w.recordsReadMetric = watcherRecordsRead.MustCurryWith(prometheus.Labels{consumer: w.name})
+	w.recordDecodeFailsMetric = watcherRecordDecodeFails.WithLabelValues(w.name)
+	w.samplesSentPreTailing = watcherSamplesSentPreTailing.WithLabelValues(w.name)
+	w.currentSegmentMetric = watcherCurrentSegment.WithLabelValues(w.name)
+}
+
+// Start the WALWatcher.
+func (w *WALWatcher) Start() {
+	w.setMetrics()
+	level.Info(w.logger).Log("msg", "starting WAL watcher", "queue", w.name)
+
+	go w.loop()
+}
+
+// Stop the WALWatcher.
+func (w *WALWatcher) Stop() {
+	close(w.quit)
+	<-w.done
+
+	// Records read metric has series and samples.
+	watcherRecordsRead.DeleteLabelValues(w.name, "series")
+	watcherRecordsRead.DeleteLabelValues(w.name, "samples")
+	watcherRecordDecodeFails.DeleteLabelValues(w.name)
+	watcherSamplesSentPreTailing.DeleteLabelValues(w.name)
+	watcherCurrentSegment.DeleteLabelValues(w.name)
+
+	level.Info(w.logger).Log("msg", "WAL watcher stopped", "queue", w.name)
+}
+
+func (w *WALWatcher) loop() {
+	defer close(w.done)
+
+	// We may encourter failures processing the WAL; we should wait and retry.
+	for !isClosed(w.quit) {
+		w.startTime = timestamp.FromTime(time.Now())
+		if err := w.run(); err != nil {
+			level.Error(w.logger).Log("msg", "error tailing WAL", "err", err)
+		}
+
+		select {
+		case <-w.quit:
+			return
+		case <-time.After(5 * time.Second):
+		}
+	}
+}
+
+func (w *WALWatcher) run() error {
+	_, lastSegment, err := w.firstAndLast()
+	if err != nil {
+		return errors.Wrap(err, "wal.Segments")
+	}
+
+	// Backfill from the checkpoint first if it exists.
+	lastCheckpoint, checkpointIndex, err := LastCheckpoint(w.walDir)
+	if err != nil && err != record.ErrNotFound {
+		return errors.Wrap(err, "LastCheckpoint")
+	}
+
+	if err == nil {
+		if err = w.readCheckpoint(lastCheckpoint); err != nil {
+			return errors.Wrap(err, "readCheckpoint")
+		}
+	}
+	w.lastCheckpoint = lastCheckpoint
+
+	currentSegment, err := w.findSegmentForIndex(checkpointIndex)
+	if err != nil {
+		return err
+	}
+
+	level.Debug(w.logger).Log("msg", "tailing WAL", "lastCheckpoint", lastCheckpoint, "checkpointIndex", checkpointIndex, "currentSegment", currentSegment, "lastSegment", lastSegment)
+	for !isClosed(w.quit) {
+		w.currentSegmentMetric.Set(float64(currentSegment))
+		level.Debug(w.logger).Log("msg", "processing segment", "currentSegment", currentSegment)
+
+		// On start, after reading the existing WAL for series records, we have a pointer to what is the latest segment.
+		// On subsequent calls to this function, currentSegment will have been incremented and we should open that segment.
+		if err := w.watch(currentSegment, currentSegment >= lastSegment); err != nil {
+			return err
+		}
+
+		// For testing: stop when you hit a specific segment.
+		if currentSegment == w.maxSegment {
+			return nil
+		}
+
+		currentSegment++
+	}
+
+	return nil
+}
+
+// findSegmentForIndex finds the first segment greater than or equal to index.
+func (w *WALWatcher) findSegmentForIndex(index int) (int, error) {
+	refs, err := w.segments(w.walDir)
+	if err != nil {
+		return -1, nil
+	}
+
+	for _, r := range refs {
+		if r >= index {
+			return r, nil
+		}
+	}
+
+	return -1, errors.New("failed to find segment for index")
+}
+
+func (w *WALWatcher) firstAndLast() (int, int, error) {
+	refs, err := w.segments(w.walDir)
+	if err != nil {
+		return -1, -1, nil
+	}
+
+	if len(refs) == 0 {
+		return -1, -1, nil
+	}
+	return refs[0], refs[len(refs)-1], nil
+}
+
+// Copied from tsdb/wal/wal.go so we do not have to open a WAL.
+// Plan is to move WAL watcher to TSDB and dedupe these implementations.
+func (w *WALWatcher) segments(dir string) ([]int, error) {
+	files, err := fileutil.ReadDir(dir)
+	if err != nil {
+		return nil, err
+	}
+
+	var refs []int
+	var last int
+	for _, fn := range files {
+		k, err := strconv.Atoi(fn)
+		if err != nil {
+			continue
+		}
+		if len(refs) > 0 && k > last+1 {
+			return nil, errors.New("segments are not sequential")
+		}
+		refs = append(refs, k)
+		last = k
+	}
+	sort.Ints(refs)
+
+	return refs, nil
+}
+
+// Use tail true to indicate that the reader is currently on a segment that is
+// actively being written to. If false, assume it's a full segment and we're
+// replaying it on start to cache the series records.
+func (w *WALWatcher) watch(segmentNum int, tail bool) error {
+	segment, err := OpenReadSegment(SegmentName(w.walDir, segmentNum))
+	if err != nil {
+		return err
+	}
+	defer segment.Close()
+
+	reader := NewLiveReader(w.logger, prometheus.DefaultRegisterer, segment)
+
+	readTicker := time.NewTicker(readPeriod)
+	defer readTicker.Stop()
+
+	checkpointTicker := time.NewTicker(checkpointPeriod)
+	defer checkpointTicker.Stop()
+
+	segmentTicker := time.NewTicker(segmentCheckPeriod)
+	defer segmentTicker.Stop()
+
+	// If we're replaying the segment we need to know the size of the file to know
+	// when to return from watch and move on to the next segment.
+	size := int64(math.MaxInt64)
+	if !tail {
+		segmentTicker.Stop()
+		checkpointTicker.Stop()
+		var err error
+		size, err = getSegmentSize(w.walDir, segmentNum)
+		if err != nil {
+			return errors.Wrap(err, "getSegmentSize")
+		}
+	}
+
+	for {
+		select {
+		case <-w.quit:
+			return nil
+
+		case <-checkpointTicker.C:
+			// Periodically check if there is a new checkpoint so we can garbage
+			// collect labels. As this is considered an optimisation, we ignore
+			// errors during checkpoint processing.
+			if err := w.garbageCollectSeries(segmentNum); err != nil {
+				level.Warn(w.logger).Log("msg", "error process checkpoint", "err", err)
+			}
+
+		case <-segmentTicker.C:
+			_, last, err := w.firstAndLast()
+			if err != nil {
+				return errors.Wrap(err, "segments")
+			}
+
+			// Check if new segments exists.
+			if last <= segmentNum {
+				continue
+			}
+
+			err = w.readSegment(reader, segmentNum, tail)
+
+			// Ignore errors reading to end of segment whilst replaying the WAL.
+			if !tail {
+				if err != nil && err != io.EOF {
+					level.Warn(w.logger).Log("msg", "ignoring error reading to end of segment, may have dropped data", "err", err)
+				} else if reader.Offset() != size {
+					level.Warn(w.logger).Log("msg", "expected to have read whole segment, may have dropped data", "segment", segmentNum, "read", reader.Offset(), "size", size)
+				}
+				return nil
+			}
+
+			// Otherwise, when we are tailing, non-EOFs are fatal.
+			if err != io.EOF {
+				return err
+			}
+
+			return nil
+
+		case <-readTicker.C:
+			err = w.readSegment(reader, segmentNum, tail)
+
+			// Ignore all errors reading to end of segment whilst replaying the WAL.
+			if !tail {
+				if err != nil && err != io.EOF {
+					level.Warn(w.logger).Log("msg", "ignoring error reading to end of segment, may have dropped data", "segment", segmentNum, "err", err)
+				} else if reader.Offset() != size {
+					level.Warn(w.logger).Log("msg", "expected to have read whole segment, may have dropped data", "segment", segmentNum, "read", reader.Offset(), "size", size)
+				}
+				return nil
+			}
+
+			// Otherwise, when we are tailing, non-EOFs are fatal.
+			if err != io.EOF {
+				return err
+			}
+		}
+	}
+}
+
+func (w *WALWatcher) garbageCollectSeries(segmentNum int) error {
+	dir, _, err := LastCheckpoint(w.walDir)
+	if err != nil && err != record.ErrNotFound {
+		return errors.Wrap(err, "LastCheckpoint")
+	}
+
+	if dir == "" || dir == w.lastCheckpoint {
+		return nil
+	}
+	w.lastCheckpoint = dir
+
+	index, err := checkpointNum(dir)
+	if err != nil {
+		return errors.Wrap(err, "error parsing checkpoint filename")
+	}
+
+	if index >= segmentNum {
+		level.Debug(w.logger).Log("msg", "current segment is behind the checkpoint, skipping reading of checkpoint", "current", fmt.Sprintf("%08d", segmentNum), "checkpoint", dir)
+		return nil
+	}
+
+	level.Debug(w.logger).Log("msg", "new checkpoint detected", "new", dir, "currentSegment", segmentNum)
+
+	if err = w.readCheckpoint(dir); err != nil {
+		return errors.Wrap(err, "readCheckpoint")
+	}
+
+	// Clear series with a checkpoint or segment index # lower than the checkpoint we just read.
+	w.writer.SeriesReset(index)
+	return nil
+}
+
+func (w *WALWatcher) readSegment(r *LiveReader, segmentNum int, tail bool) error {
+	var (
+		dec     record.RecordDecoder
+		series  []record.RefSeries
+		samples []record.RefSample
+	)
+
+	for r.Next() && !isClosed(w.quit) {
+		rec := r.Record()
+		w.recordsReadMetric.WithLabelValues(recordType(dec.Type(rec))).Inc()
+
+		switch dec.Type(rec) {
+		case record.RecordSeries:
+			series, err := dec.Series(rec, series[:0])
+			if err != nil {
+				w.recordDecodeFailsMetric.Inc()
+				return err
+			}
+			w.writer.StoreSeries(series, segmentNum)
+
+		case record.RecordSamples:
+			// If we're not tailing a segment we can ignore any samples records we see.
+			// This speeds up replay of the WAL by > 10x.
+			if !tail {
+				break
+			}
+			samples, err := dec.Samples(rec, samples[:0])
+			if err != nil {
+				w.recordDecodeFailsMetric.Inc()
+				return err
+			}
+			var send []record.RefSample
+			for _, s := range samples {
+				if s.T > w.startTime {
+					send = append(send, s)
+				}
+			}
+			if len(send) > 0 {
+				// Blocks  until the sample is sent to all remote write endpoints or closed (because enqueue blocks).
+				w.writer.Append(send)
+			}
+
+		case record.RecordTombstones:
+			// noop
+		case record.RecordInvalid:
+			return errors.New("invalid record")
+
+		default:
+			w.recordDecodeFailsMetric.Inc()
+			return errors.New("unknown TSDB record type")
+		}
+	}
+	return r.Err()
+}
+
+func recordType(rt record.RecordType) string {
+	switch rt {
+	case record.RecordInvalid:
+		return "invalid"
+	case record.RecordSeries:
+		return "series"
+	case record.RecordSamples:
+		return "samples"
+	case record.RecordTombstones:
+		return "tombstones"
+	default:
+		return "unknown"
+	}
+}
+
+// Read all the series records from a Checkpoint directory.
+func (w *WALWatcher) readCheckpoint(checkpointDir string) error {
+	level.Debug(w.logger).Log("msg", "reading checkpoint", "dir", checkpointDir)
+	index, err := checkpointNum(checkpointDir)
+	if err != nil {
+		return errors.Wrap(err, "checkpointNum")
+	}
+
+	// Ensure we read the whole contents of every segment in the checkpoint dir.
+	segs, err := w.segments(checkpointDir)
+	if err != nil {
+		return errors.Wrap(err, "Unable to get segments checkpoint dir")
+	}
+	for _, seg := range segs {
+		size, err := getSegmentSize(checkpointDir, seg)
+		if err != nil {
+			return errors.Wrap(err, "getSegmentSize")
+		}
+
+		sr, err := OpenReadSegment(SegmentName(checkpointDir, seg))
+		if err != nil {
+			return errors.Wrap(err, "unable to open segment")
+		}
+		defer sr.Close()
+
+		r := NewLiveReader(w.logger, prometheus.DefaultRegisterer, sr)
+		if err := w.readSegment(r, index, false); err != io.EOF && err != nil {
+			return errors.Wrap(err, "readSegment")
+		}
+
+		if r.Offset() != size {
+			return fmt.Errorf("readCheckpoint wasn't able to read all data from the checkpoint %s/%08d, size: %d, totalRead: %d", checkpointDir, seg, size, r.Offset())
+		}
+	}
+
+	level.Debug(w.logger).Log("msg", "read series references from checkpoint", "checkpoint", checkpointDir)
+	return nil
+}
+
+func checkpointNum(dir string) (int, error) {
+	// Checkpoint dir names are in the format checkpoint.000001
+	chunks := strings.Split(dir, ".")
+	if len(chunks) != 2 {
+		return 0, errors.Errorf("invalid checkpoint dir string: %s", dir)
+	}
+
+	result, err := strconv.Atoi(chunks[1])
+	if err != nil {
+		return 0, errors.Errorf("invalid checkpoint dir string: %s", dir)
+	}
+
+	return result, nil
+}
+
+// Get size of segment.
+func getSegmentSize(dir string, index int) (int64, error) {
+	i := int64(-1)
+	fi, err := os.Stat(SegmentName(dir, index))
+	if err == nil {
+		i = fi.Size()
+	}
+	return i, err
+}
+
+func isClosed(c chan struct{}) bool {
+	select {
+	case <-c:
+		return true
+	default:
+		return false
+	}
+}
diff --git a/wal/wal_watcher_test.go b/wal/wal_watcher_test.go
new file mode 100644
index 00000000..e8e1d4b7
--- /dev/null
+++ b/wal/wal_watcher_test.go
@@ -0,0 +1,509 @@
+// Copyright 2018 The Prometheus Authors
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+package wal
+
+import (
+	"fmt"
+	"io/ioutil"
+	"math/rand"
+	"os"
+	"path"
+	"sync"
+	"testing"
+	"time"
+
+	"github.com/prometheus/client_golang/prometheus"
+	"github.com/prometheus/tsdb/labels"
+	"github.com/prometheus/tsdb/record"
+	"github.com/prometheus/tsdb/testutil"
+)
+
+var defaultRetryInterval = 100 * time.Millisecond
+var defaultRetries = 100
+
+// retry executes f() n times at each interval until it returns true.
+func retry(t *testing.T, interval time.Duration, n int, f func() bool) {
+	t.Helper()
+	ticker := time.NewTicker(interval)
+	for i := 0; i <= n; i++ {
+		if f() {
+			return
+		}
+		<-ticker.C
+	}
+	ticker.Stop()
+	t.Logf("function returned false")
+}
+
+type writeToMock struct {
+	samplesAppended      int
+	seriesLock           sync.Mutex
+	seriesSegmentIndexes map[uint64]int
+}
+
+func (wtm *writeToMock) Append(s []record.RefSample) bool {
+	wtm.samplesAppended += len(s)
+	return true
+}
+
+func (wtm *writeToMock) StoreSeries(series []record.RefSeries, index int) {
+	wtm.seriesLock.Lock()
+	defer wtm.seriesLock.Unlock()
+	for _, s := range series {
+		wtm.seriesSegmentIndexes[s.Ref] = index
+	}
+}
+
+func (wtm *writeToMock) SeriesReset(index int) {
+	// Check for series that are in segments older than the checkpoint
+	// that were not also present in the checkpoint.
+	wtm.seriesLock.Lock()
+	defer wtm.seriesLock.Unlock()
+	for k, v := range wtm.seriesSegmentIndexes {
+		if v < index {
+			delete(wtm.seriesSegmentIndexes, k)
+		}
+	}
+}
+
+func (wtm *writeToMock) checkNumLabels() int {
+	wtm.seriesLock.Lock()
+	defer wtm.seriesLock.Unlock()
+	return len(wtm.seriesSegmentIndexes)
+}
+
+func newWriteToMock() *writeToMock {
+	return &writeToMock{
+		seriesSegmentIndexes: make(map[uint64]int),
+	}
+}
+
+func TestTailSamples(t *testing.T) {
+	pageSize := 32 * 1024
+	const seriesCount = 10
+	const samplesCount = 250
+	now := time.Now()
+
+	dir, err := ioutil.TempDir("", "readCheckpoint")
+	testutil.Ok(t, err)
+	defer os.RemoveAll(dir)
+
+	wdir := path.Join(dir, "wal")
+	err = os.Mkdir(wdir, 0777)
+	testutil.Ok(t, err)
+
+	enc := record.RecordEncoder{}
+	w, err := NewSize(nil, prometheus.DefaultRegisterer, wdir, 128*pageSize, false)
+	testutil.Ok(t, err)
+
+	// Write to the initial segment then checkpoint.
+	for i := 0; i < seriesCount; i++ {
+		ref := i + 100
+		series := enc.Series([]record.RefSeries{
+			record.RefSeries{
+				Ref:    uint64(ref),
+				Labels: labels.Labels{labels.Label{Name: "__name__", Value: fmt.Sprintf("metric_%d", i)}},
+			},
+		}, nil)
+		testutil.Ok(t, w.Log(series))
+
+		for j := 0; j < samplesCount; j++ {
+			inner := rand.Intn(ref + 1)
+			sample := enc.Samples([]record.RefSample{
+				record.RefSample{
+					Ref: uint64(inner),
+					T:   int64(now.UnixNano()) + 1,
+					V:   float64(i),
+				},
+			}, nil)
+			testutil.Ok(t, w.Log(sample))
+		}
+	}
+
+	// Start read after checkpoint, no more data written.
+	first, last, err := w.Segments()
+	testutil.Ok(t, err)
+
+	wt := newWriteToMock()
+	watcher := NewWALWatcher(nil, "", wt, dir)
+	watcher.startTime = now.UnixNano()
+
+	// Set the Watcher's metrics so they're not nil pointers.
+	watcher.setMetrics()
+	for i := first; i <= last; i++ {
+		segment, err := OpenReadSegment(SegmentName(watcher.walDir, i))
+		testutil.Ok(t, err)
+		defer segment.Close()
+
+		reader := NewLiveReader(nil, prometheus.DefaultRegisterer, segment)
+		// Use tail true so we can ensure we got the right number of samples.
+		watcher.readSegment(reader, i, true)
+	}
+
+	expectedSeries := seriesCount
+	expectedSamples := seriesCount * samplesCount
+	retry(t, defaultRetryInterval, defaultRetries, func() bool {
+		return wt.checkNumLabels() >= expectedSeries
+	})
+	testutil.Equals(t, expectedSeries, wt.checkNumLabels())
+	testutil.Equals(t, expectedSamples, wt.samplesAppended)
+}
+
+func TestReadToEndNoCheckpoint(t *testing.T) {
+	pageSize := 32 * 1024
+	const seriesCount = 10
+	const samplesCount = 250
+
+	dir, err := ioutil.TempDir("", "readToEnd_noCheckpoint")
+	testutil.Ok(t, err)
+	defer os.RemoveAll(dir)
+	wdir := path.Join(dir, "wal")
+	err = os.Mkdir(wdir, 0777)
+	testutil.Ok(t, err)
+
+	w, err := NewSize(nil, nil, wdir, 128*pageSize, false)
+	testutil.Ok(t, err)
+
+	var recs [][]byte
+
+	enc := record.RecordEncoder{}
+
+	for i := 0; i < seriesCount; i++ {
+		series := enc.Series([]record.RefSeries{
+			record.RefSeries{
+				Ref:    uint64(i),
+				Labels: labels.Labels{labels.Label{Name: "__name__", Value: fmt.Sprintf("metric_%d", i)}},
+			},
+		}, nil)
+		recs = append(recs, series)
+		for j := 0; j < samplesCount; j++ {
+			sample := enc.Samples([]record.RefSample{
+				record.RefSample{
+					Ref: uint64(j),
+					T:   int64(i),
+					V:   float64(i),
+				},
+			}, nil)
+
+			recs = append(recs, sample)
+
+			// Randomly batch up records.
+			if rand.Intn(4) < 3 {
+				testutil.Ok(t, w.Log(recs...))
+				recs = recs[:0]
+			}
+		}
+	}
+	testutil.Ok(t, w.Log(recs...))
+
+	_, _, err = w.Segments()
+	testutil.Ok(t, err)
+
+	wt := newWriteToMock()
+	watcher := NewWALWatcher(nil, "", wt, dir)
+	go watcher.Start()
+
+	expected := seriesCount
+	retry(t, defaultRetryInterval, defaultRetries, func() bool {
+		return wt.checkNumLabels() >= expected
+	})
+	watcher.Stop()
+	testutil.Equals(t, expected, wt.checkNumLabels())
+}
+
+func TestReadToEndWithCheckpoint(t *testing.T) {
+	segmentSize := 32 * 1024
+	// We need something similar to this # of series and samples
+	// in order to get enough segments for us to checkpoint.
+	const seriesCount = 10
+	const samplesCount = 250
+
+	dir, err := ioutil.TempDir("", "readToEnd_withCheckpoint")
+	testutil.Ok(t, err)
+	defer os.RemoveAll(dir)
+
+	wdir := path.Join(dir, "wal")
+	err = os.Mkdir(wdir, 0777)
+	testutil.Ok(t, err)
+
+	enc := record.RecordEncoder{}
+	w, err := NewSize(nil, nil, wdir, segmentSize, false)
+	testutil.Ok(t, err)
+
+	// Write to the initial segment then checkpoint.
+	for i := 0; i < seriesCount; i++ {
+		ref := i + 100
+		series := enc.Series([]record.RefSeries{
+			record.RefSeries{
+				Ref:    uint64(ref),
+				Labels: labels.Labels{labels.Label{Name: "__name__", Value: fmt.Sprintf("metric_%d", i)}},
+			},
+		}, nil)
+		testutil.Ok(t, w.Log(series))
+
+		for j := 0; j < samplesCount; j++ {
+			inner := rand.Intn(ref + 1)
+			sample := enc.Samples([]record.RefSample{
+				record.RefSample{
+					Ref: uint64(inner),
+					T:   int64(i),
+					V:   float64(i),
+				},
+			}, nil)
+			testutil.Ok(t, w.Log(sample))
+		}
+	}
+
+	Checkpoint(w, 0, 1, func(x uint64) bool { return true }, 0)
+	w.Truncate(1)
+
+	// Write more records after checkpointing.
+	for i := 0; i < seriesCount; i++ {
+		series := enc.Series([]record.RefSeries{
+			record.RefSeries{
+				Ref:    uint64(i),
+				Labels: labels.Labels{labels.Label{Name: "__name__", Value: fmt.Sprintf("metric_%d", i)}},
+			},
+		}, nil)
+		testutil.Ok(t, w.Log(series))
+
+		for j := 0; j < samplesCount; j++ {
+			sample := enc.Samples([]record.RefSample{
+				record.RefSample{
+					Ref: uint64(j),
+					T:   int64(i),
+					V:   float64(i),
+				},
+			}, nil)
+			testutil.Ok(t, w.Log(sample))
+		}
+	}
+
+	_, _, err = w.Segments()
+	testutil.Ok(t, err)
+	wt := newWriteToMock()
+	watcher := NewWALWatcher(nil, "", wt, dir)
+	go watcher.Start()
+
+	expected := seriesCount * 2
+	retry(t, defaultRetryInterval, defaultRetries, func() bool {
+		return wt.checkNumLabels() >= expected
+	})
+	watcher.Stop()
+	testutil.Equals(t, expected, wt.checkNumLabels())
+}
+
+func TestReadCheckpoint(t *testing.T) {
+	pageSize := 32 * 1024
+	const seriesCount = 10
+	const samplesCount = 250
+
+	dir, err := ioutil.TempDir("", "readCheckpoint")
+	testutil.Ok(t, err)
+	defer os.RemoveAll(dir)
+
+	wdir := path.Join(dir, "wal")
+	err = os.Mkdir(wdir, 0777)
+	testutil.Ok(t, err)
+
+	os.Create(SegmentName(wdir, 30))
+
+	enc := record.RecordEncoder{}
+	w, err := NewSize(nil, nil, wdir, 128*pageSize, false)
+	testutil.Ok(t, err)
+
+	// Write to the initial segment then checkpoint.
+	for i := 0; i < seriesCount; i++ {
+		ref := i + 100
+		series := enc.Series([]record.RefSeries{
+			record.RefSeries{
+				Ref:    uint64(ref),
+				Labels: labels.Labels{labels.Label{Name: "__name__", Value: fmt.Sprintf("metric_%d", i)}},
+			},
+		}, nil)
+		testutil.Ok(t, w.Log(series))
+
+		for j := 0; j < samplesCount; j++ {
+			inner := rand.Intn(ref + 1)
+			sample := enc.Samples([]record.RefSample{
+				record.RefSample{
+					Ref: uint64(inner),
+					T:   int64(i),
+					V:   float64(i),
+				},
+			}, nil)
+			testutil.Ok(t, w.Log(sample))
+		}
+	}
+	Checkpoint(w, 30, 31, func(x uint64) bool { return true }, 0)
+	w.Truncate(32)
+
+	// Start read after checkpoint, no more data written.
+	_, _, err = w.Segments()
+	testutil.Ok(t, err)
+
+	wt := newWriteToMock()
+	watcher := NewWALWatcher(nil, "", wt, dir)
+	// watcher.
+	go watcher.Start()
+
+	expectedSeries := seriesCount
+	retry(t, defaultRetryInterval, defaultRetries, func() bool {
+		return wt.checkNumLabels() >= expectedSeries
+	})
+	watcher.Stop()
+	testutil.Equals(t, expectedSeries, wt.checkNumLabels())
+}
+
+func TestReadCheckpointMultipleSegments(t *testing.T) {
+	pageSize := 32 * 1024
+
+	const segments = 1
+	const seriesCount = 20
+	const samplesCount = 300
+
+	dir, err := ioutil.TempDir("", "readCheckpoint")
+	testutil.Ok(t, err)
+	defer os.RemoveAll(dir)
+
+	wdir := path.Join(dir, "wal")
+	err = os.Mkdir(wdir, 0777)
+	testutil.Ok(t, err)
+
+	enc := record.RecordEncoder{}
+	w, err := NewSize(nil, nil, wdir, pageSize, false)
+	testutil.Ok(t, err)
+
+	// Write a bunch of data.
+	for i := 0; i < segments; i++ {
+		for j := 0; j < seriesCount; j++ {
+			ref := j + (i * 100)
+			series := enc.Series([]record.RefSeries{
+				record.RefSeries{
+					Ref:    uint64(ref),
+					Labels: labels.Labels{labels.Label{Name: "__name__", Value: fmt.Sprintf("metric_%d", j)}},
+				},
+			}, nil)
+			testutil.Ok(t, w.Log(series))
+
+			for k := 0; k < samplesCount; k++ {
+				inner := rand.Intn(ref + 1)
+				sample := enc.Samples([]record.RefSample{
+					record.RefSample{
+						Ref: uint64(inner),
+						T:   int64(i),
+						V:   float64(i),
+					},
+				}, nil)
+				testutil.Ok(t, w.Log(sample))
+			}
+		}
+	}
+
+	// At this point we should have at least 6 segments, lets create a checkpoint dir of the first 5.
+	checkpointDir := dir + "/wal/checkpoint.000004"
+	err = os.Mkdir(checkpointDir, 0777)
+	testutil.Ok(t, err)
+	for i := 0; i <= 4; i++ {
+		err := os.Rename(SegmentName(dir+"/wal", i), SegmentName(checkpointDir, i))
+		testutil.Ok(t, err)
+	}
+
+	wt := newWriteToMock()
+	watcher := NewWALWatcher(nil, "", wt, dir)
+	watcher.maxSegment = -1
+
+	// Set the Watcher's metrics so they're not nil pointers.
+	watcher.setMetrics()
+
+	lastCheckpoint, _, err := LastCheckpoint(watcher.walDir)
+	testutil.Ok(t, err)
+
+	err = watcher.readCheckpoint(lastCheckpoint)
+	testutil.Ok(t, err)
+}
+
+func TestCheckpointSeriesReset(t *testing.T) {
+	segmentSize := 32 * 1024
+	// We need something similar to this # of series and samples
+	// in order to get enough segments for us to checkpoint.
+	const seriesCount = 20
+	const samplesCount = 350
+
+	dir, err := ioutil.TempDir("", "seriesReset")
+	testutil.Ok(t, err)
+	defer os.RemoveAll(dir)
+
+	wdir := path.Join(dir, "wal")
+	err = os.Mkdir(wdir, 0777)
+	testutil.Ok(t, err)
+
+	enc := record.RecordEncoder{}
+	w, err := NewSize(nil, nil, wdir, segmentSize, false)
+	testutil.Ok(t, err)
+
+	// Write to the initial segment, then checkpoint later.
+	for i := 0; i < seriesCount; i++ {
+		ref := i + 100
+		series := enc.Series([]record.RefSeries{
+			record.RefSeries{
+				Ref:    uint64(ref),
+				Labels: labels.Labels{labels.Label{Name: "__name__", Value: fmt.Sprintf("metric_%d", i)}},
+			},
+		}, nil)
+		testutil.Ok(t, w.Log(series))
+
+		for j := 0; j < samplesCount; j++ {
+			inner := rand.Intn(ref + 1)
+			sample := enc.Samples([]record.RefSample{
+				record.RefSample{
+					Ref: uint64(inner),
+					T:   int64(i),
+					V:   float64(i),
+				},
+			}, nil)
+			testutil.Ok(t, w.Log(sample))
+		}
+	}
+
+	_, _, err = w.Segments()
+	testutil.Ok(t, err)
+
+	wt := newWriteToMock()
+	watcher := NewWALWatcher(nil, "", wt, dir)
+	watcher.maxSegment = -1
+	go watcher.Start()
+
+	expected := seriesCount
+	retry(t, defaultRetryInterval, defaultRetries, func() bool {
+		return wt.checkNumLabels() >= expected
+	})
+	testutil.Equals(t, seriesCount, wt.checkNumLabels())
+
+	_, err = Checkpoint(w, 2, 4, func(x uint64) bool { return true }, 0)
+	testutil.Ok(t, err)
+
+	err = w.Truncate(5)
+	testutil.Ok(t, err)
+
+	_, cpi, err := LastCheckpoint(path.Join(dir, "wal"))
+	testutil.Ok(t, err)
+	err = watcher.garbageCollectSeries(cpi + 1)
+	testutil.Ok(t, err)
+
+	watcher.Stop()
+	// If you modify the checkpoint and truncate segment #'s run the test to see how
+	// many series records you end up with and change the last Equals check accordingly
+	// or modify the Equals to Assert(len(wt.seriesLabels) < seriesCount*10)
+	testutil.Equals(t, 14, wt.checkNumLabels())
+}
diff --git a/wal_test.go b/wal_test.go
index 0fed5b41..c9a9168d 100644
--- a/wal_test.go
+++ b/wal_test.go
@@ -29,6 +29,7 @@ import (
 	"github.com/go-kit/kit/log"
 	"github.com/prometheus/tsdb/fileutil"
 	"github.com/prometheus/tsdb/labels"
+	"github.com/prometheus/tsdb/record"
 	"github.com/prometheus/tsdb/testutil"
 	"github.com/prometheus/tsdb/wal"
 )
@@ -95,10 +96,10 @@ func TestSegmentWAL_Truncate(t *testing.T) {
 	w.segmentSize = 10000
 
 	for i := 0; i < numMetrics; i += batch {
-		var rs []RefSeries
+		var rs []record.RefSeries
 
 		for j, s := range series[i : i+batch] {
-			rs = append(rs, RefSeries{Labels: s, Ref: uint64(i+j) + 1})
+			rs = append(rs, record.RefSeries{Labels: s, Ref: uint64(i+j) + 1})
 		}
 		err := w.LogSeries(rs)
 		testutil.Ok(t, err)
@@ -125,11 +126,11 @@ func TestSegmentWAL_Truncate(t *testing.T) {
 	err = w.Truncate(1000, keepf)
 	testutil.Ok(t, err)
 
-	var expected []RefSeries
+	var expected []record.RefSeries
 
 	for i := 1; i <= numMetrics; i++ {
 		if i%2 == 1 || uint64(i) >= boundarySeries {
-			expected = append(expected, RefSeries{Ref: uint64(i), Labels: series[i-1]})
+			expected = append(expected, record.RefSeries{Ref: uint64(i), Labels: series[i-1]})
 		}
 	}
 
@@ -143,10 +144,10 @@ func TestSegmentWAL_Truncate(t *testing.T) {
 	w, err = OpenSegmentWAL(dir, nil, 0, nil)
 	testutil.Ok(t, err)
 
-	var readSeries []RefSeries
+	var readSeries []record.RefSeries
 	r := w.Reader()
 
-	testutil.Ok(t, r.Read(func(s []RefSeries) {
+	testutil.Ok(t, r.Read(func(s []record.RefSeries) {
 		readSeries = append(readSeries, s...)
 	}, nil, nil))
 
@@ -172,9 +173,9 @@ func TestSegmentWAL_Log_Restore(t *testing.T) {
 	}()
 
 	var (
-		recordedSeries  [][]RefSeries
-		recordedSamples [][]RefSample
-		recordedDeletes [][]Stone
+		recordedSeries  [][]record.RefSeries
+		recordedSamples [][]record.RefSample
+		recordedDeletes [][]record.Stone
 	)
 	var totalSamples int
 
@@ -190,29 +191,29 @@ func TestSegmentWAL_Log_Restore(t *testing.T) {
 		r := w.Reader()
 
 		var (
-			resultSeries  [][]RefSeries
-			resultSamples [][]RefSample
-			resultDeletes [][]Stone
+			resultSeries  [][]record.RefSeries
+			resultSamples [][]record.RefSample
+			resultDeletes [][]record.Stone
 		)
 
-		serf := func(series []RefSeries) {
+		serf := func(series []record.RefSeries) {
 			if len(series) > 0 {
-				clsets := make([]RefSeries, len(series))
+				clsets := make([]record.RefSeries, len(series))
 				copy(clsets, series)
 				resultSeries = append(resultSeries, clsets)
 			}
 		}
-		smplf := func(smpls []RefSample) {
+		smplf := func(smpls []record.RefSample) {
 			if len(smpls) > 0 {
-				csmpls := make([]RefSample, len(smpls))
+				csmpls := make([]record.RefSample, len(smpls))
 				copy(csmpls, smpls)
 				resultSamples = append(resultSamples, csmpls)
 			}
 		}
 
-		delf := func(stones []Stone) {
+		delf := func(stones []record.Stone) {
 			if len(stones) > 0 {
-				cst := make([]Stone, len(stones))
+				cst := make([]record.Stone, len(stones))
 				copy(cst, stones)
 				resultDeletes = append(resultDeletes, cst)
 			}
@@ -228,11 +229,11 @@ func TestSegmentWAL_Log_Restore(t *testing.T) {
 
 		// Insert in batches and generate different amounts of samples for each.
 		for i := 0; i < len(series); i += stepSize {
-			var samples []RefSample
-			var stones []Stone
+			var samples []record.RefSample
+			var stones []record.Stone
 
 			for j := 0; j < i*10; j++ {
-				samples = append(samples, RefSample{
+				samples = append(samples, record.RefSample{
 					Ref: uint64(j % 10000),
 					T:   int64(j * 2),
 					V:   rand.Float64(),
@@ -241,13 +242,13 @@ func TestSegmentWAL_Log_Restore(t *testing.T) {
 
 			for j := 0; j < i*20; j++ {
 				ts := rand.Int63()
-				stones = append(stones, Stone{rand.Uint64(), Intervals{{ts, ts + rand.Int63n(10000)}}})
+				stones = append(stones, record.Stone{rand.Uint64(), record.Intervals{{ts, ts + rand.Int63n(10000)}}})
 			}
 
 			lbls := series[i : i+stepSize]
-			series := make([]RefSeries, 0, len(series))
+			series := make([]record.RefSeries, 0, len(series))
 			for j, l := range lbls {
-				series = append(series, RefSeries{
+				series = append(series, record.RefSeries{
 					Ref:    uint64(i + j),
 					Labels: l,
 				})
@@ -382,8 +383,8 @@ func TestWALRestoreCorrupted(t *testing.T) {
 			w, err := OpenSegmentWAL(dir, nil, 0, nil)
 			testutil.Ok(t, err)
 
-			testutil.Ok(t, w.LogSamples([]RefSample{{T: 1, V: 2}}))
-			testutil.Ok(t, w.LogSamples([]RefSample{{T: 2, V: 3}}))
+			testutil.Ok(t, w.LogSamples([]record.RefSample{{T: 1, V: 2}}))
+			testutil.Ok(t, w.LogSamples([]record.RefSample{{T: 2, V: 3}}))
 
 			testutil.Ok(t, w.cut())
 
@@ -392,8 +393,8 @@ func TestWALRestoreCorrupted(t *testing.T) {
 			// Hopefully cut will complete by 2 seconds.
 			time.Sleep(2 * time.Second)
 
-			testutil.Ok(t, w.LogSamples([]RefSample{{T: 3, V: 4}}))
-			testutil.Ok(t, w.LogSamples([]RefSample{{T: 5, V: 6}}))
+			testutil.Ok(t, w.LogSamples([]record.RefSample{{T: 3, V: 4}}))
+			testutil.Ok(t, w.LogSamples([]record.RefSample{{T: 5, V: 6}}))
 
 			testutil.Ok(t, w.Close())
 
@@ -414,24 +415,24 @@ func TestWALRestoreCorrupted(t *testing.T) {
 
 			r := w2.Reader()
 
-			serf := func(l []RefSeries) {
+			serf := func(l []record.RefSeries) {
 				testutil.Equals(t, 0, len(l))
 			}
 
 			// Weird hack to check order of reads.
 			i := 0
-			samplf := func(s []RefSample) {
+			samplf := func(s []record.RefSample) {
 				if i == 0 {
-					testutil.Equals(t, []RefSample{{T: 1, V: 2}}, s)
+					testutil.Equals(t, []record.RefSample{{T: 1, V: 2}}, s)
 					i++
 				} else {
-					testutil.Equals(t, []RefSample{{T: 99, V: 100}}, s)
+					testutil.Equals(t, []record.RefSample{{T: 99, V: 100}}, s)
 				}
 			}
 
 			testutil.Ok(t, r.Read(serf, samplf, nil))
 
-			testutil.Ok(t, w2.LogSamples([]RefSample{{T: 99, V: 100}}))
+			testutil.Ok(t, w2.LogSamples([]record.RefSample{{T: 99, V: 100}}))
 			testutil.Ok(t, w2.Close())
 
 			// We should see the first valid entry and the new one, everything after
@@ -482,23 +483,23 @@ func TestMigrateWAL_Fuzz(t *testing.T) {
 	testutil.Ok(t, err)
 
 	// Write some data.
-	testutil.Ok(t, oldWAL.LogSeries([]RefSeries{
+	testutil.Ok(t, oldWAL.LogSeries([]record.RefSeries{
 		{Ref: 100, Labels: labels.FromStrings("abc", "def", "123", "456")},
 		{Ref: 1, Labels: labels.FromStrings("abc", "def2", "1234", "4567")},
 	}))
-	testutil.Ok(t, oldWAL.LogSamples([]RefSample{
+	testutil.Ok(t, oldWAL.LogSamples([]record.RefSample{
 		{Ref: 1, T: 100, V: 200},
 		{Ref: 2, T: 300, V: 400},
 	}))
-	testutil.Ok(t, oldWAL.LogSeries([]RefSeries{
+	testutil.Ok(t, oldWAL.LogSeries([]record.RefSeries{
 		{Ref: 200, Labels: labels.FromStrings("xyz", "def", "foo", "bar")},
 	}))
-	testutil.Ok(t, oldWAL.LogSamples([]RefSample{
+	testutil.Ok(t, oldWAL.LogSamples([]record.RefSample{
 		{Ref: 3, T: 100, V: 200},
 		{Ref: 4, T: 300, V: 400},
 	}))
-	testutil.Ok(t, oldWAL.LogDeletes([]Stone{
-		{ref: 1, intervals: []Interval{{100, 200}}},
+	testutil.Ok(t, oldWAL.LogDeletes([]record.Stone{
+		{Ref: 1, Intervals: []record.Interval{{100, 200}}},
 	}))
 
 	testutil.Ok(t, oldWAL.Close())
@@ -510,8 +511,8 @@ func TestMigrateWAL_Fuzz(t *testing.T) {
 	testutil.Ok(t, err)
 
 	// We can properly write some new data after migration.
-	var enc RecordEncoder
-	testutil.Ok(t, w.Log(enc.Samples([]RefSample{
+	var enc record.RecordEncoder
+	testutil.Ok(t, w.Log(enc.Samples([]record.RefSample{
 		{Ref: 500, T: 1, V: 1},
 	}, nil)))
 
@@ -523,21 +524,21 @@ func TestMigrateWAL_Fuzz(t *testing.T) {
 
 	r := wal.NewReader(sr)
 	var res []interface{}
-	var dec RecordDecoder
+	var dec record.RecordDecoder
 
 	for r.Next() {
 		rec := r.Record()
 
 		switch dec.Type(rec) {
-		case RecordSeries:
+		case record.RecordSeries:
 			s, err := dec.Series(rec, nil)
 			testutil.Ok(t, err)
 			res = append(res, s)
-		case RecordSamples:
+		case record.RecordSamples:
 			s, err := dec.Samples(rec, nil)
 			testutil.Ok(t, err)
 			res = append(res, s)
-		case RecordTombstones:
+		case record.RecordTombstones:
 			s, err := dec.Tombstones(rec, nil)
 			testutil.Ok(t, err)
 			res = append(res, s)
@@ -548,17 +549,17 @@ func TestMigrateWAL_Fuzz(t *testing.T) {
 	testutil.Ok(t, r.Err())
 
 	testutil.Equals(t, []interface{}{
-		[]RefSeries{
+		[]record.RefSeries{
 			{Ref: 100, Labels: labels.FromStrings("abc", "def", "123", "456")},
 			{Ref: 1, Labels: labels.FromStrings("abc", "def2", "1234", "4567")},
 		},
-		[]RefSample{{Ref: 1, T: 100, V: 200}, {Ref: 2, T: 300, V: 400}},
-		[]RefSeries{
+		[]record.RefSample{{Ref: 1, T: 100, V: 200}, {Ref: 2, T: 300, V: 400}},
+		[]record.RefSeries{
 			{Ref: 200, Labels: labels.FromStrings("xyz", "def", "foo", "bar")},
 		},
-		[]RefSample{{Ref: 3, T: 100, V: 200}, {Ref: 4, T: 300, V: 400}},
-		[]Stone{{ref: 1, intervals: []Interval{{100, 200}}}},
-		[]RefSample{{Ref: 500, T: 1, V: 1}},
+		[]record.RefSample{{Ref: 3, T: 100, V: 200}, {Ref: 4, T: 300, V: 400}},
+		[]record.Stone{{Ref: 1, Intervals: []record.Interval{{100, 200}}}},
+		[]record.RefSample{{Ref: 500, T: 1, V: 1}},
 	}, res)
 
 	// Migrating an already migrated WAL shouldn't do anything.

From 643a5c9525f9ec69cccc10e936aafd63eb33ccc6 Mon Sep 17 00:00:00 2001
From: Callum Styan <callumstyan@gmail.com>
Date: Tue, 4 Jun 2019 13:08:57 -0700
Subject: [PATCH 02/16] Copy FromTime function from timestamp package so we
 don't have to vendor something from Prometheus.

Signed-off-by: Callum Styan <callumstyan@gmail.com>
---
 wal/wal_watcher.go | 10 ++++++++--
 1 file changed, 8 insertions(+), 2 deletions(-)

diff --git a/wal/wal_watcher.go b/wal/wal_watcher.go
index f9f7776c..b85d1f06 100644
--- a/wal/wal_watcher.go
+++ b/wal/wal_watcher.go
@@ -28,7 +28,6 @@ import (
 	"github.com/go-kit/kit/log/level"
 	"github.com/pkg/errors"
 	"github.com/prometheus/client_golang/prometheus"
-	"github.com/prometheus/prometheus/pkg/timestamp"
 	"github.com/prometheus/tsdb/fileutil"
 	"github.com/prometheus/tsdb/record"
 )
@@ -79,6 +78,13 @@ var (
 	)
 )
 
+// This function is copied from prometheus/prometheus/pkg/timestamp to avoid adding vendor to TSDB repo.
+
+// FromTime returns a new millisecond timestamp from a time.
+func FromTime(t time.Time) int64 {
+	return t.Unix()*1000 + int64(t.Nanosecond())/int64(time.Millisecond)
+}
+
 func init() {
 	prometheus.MustRegister(watcherRecordsRead)
 	prometheus.MustRegister(watcherRecordDecodeFails)
@@ -169,7 +175,7 @@ func (w *WALWatcher) loop() {
 
 	// We may encourter failures processing the WAL; we should wait and retry.
 	for !isClosed(w.quit) {
-		w.startTime = timestamp.FromTime(time.Now())
+		w.startTime = FromTime(time.Now())
 		if err := w.run(); err != nil {
 			level.Error(w.logger).Log("msg", "error tailing WAL", "err", err)
 		}

From b84a29720b223453a13c33b357b2be5988f56e7b Mon Sep 17 00:00:00 2001
From: Callum Styan <callumstyan@gmail.com>
Date: Tue, 4 Jun 2019 16:56:51 -0700
Subject: [PATCH 03/16] WAL Watcher needs to take in and pass a Registerer to
 LiveReader.

Signed-off-by: Callum Styan <callumstyan@gmail.com>
---
 wal/wal_watcher.go      | 20 +++++++++++++++++++-
 wal/wal_watcher_test.go | 14 +++++++-------
 2 files changed, 26 insertions(+), 8 deletions(-)

diff --git a/wal/wal_watcher.go b/wal/wal_watcher.go
index b85d1f06..aa00acf8 100644
--- a/wal/wal_watcher.go
+++ b/wal/wal_watcher.go
@@ -105,6 +105,7 @@ type WALWatcher struct {
 	logger         log.Logger
 	walDir         string
 	lastCheckpoint string
+	reg            prometheus.Registerer
 
 	startTime int64
 
@@ -121,12 +122,21 @@ type WALWatcher struct {
 }
 
 // NewWALWatcher creates a new WAL watcher for a given WriteTo.
-func NewWALWatcher(logger log.Logger, name string, writer writeTo, walDir string) *WALWatcher {
+func NewWALWatcher(logger log.Logger, reg prometheus.Registerer, name string, writer writeTo, walDir string) *WALWatcher {
 	if logger == nil {
 		logger = log.NewNopLogger()
 	}
+	if reg != nil {
+		// We can't use MustRegister because WALWatcher's are recreated on config changes within Prometheus.
+		reg.Register(watcherRecordsRead)
+		reg.Register(watcherRecordDecodeFails)
+		reg.Register(watcherSamplesSentPreTailing)
+		reg.Register(watcherCurrentSegment)
+	}
+
 	return &WALWatcher{
 		logger: logger,
+		reg:    reg,
 		writer: writer,
 		walDir: path.Join(walDir, "wal"),
 		name:   name,
@@ -298,7 +308,11 @@ func (w *WALWatcher) watch(segmentNum int, tail bool) error {
 	}
 	defer segment.Close()
 
+<<<<<<< HEAD
 	reader := NewLiveReader(w.logger, prometheus.DefaultRegisterer, segment)
+=======
+	reader := NewLiveReader(w.logger, w.reg, segment)
+>>>>>>> WAL Watcher needs to take in and pass a Registerer to LiveReader.
 
 	readTicker := time.NewTicker(readPeriod)
 	defer readTicker.Stop()
@@ -513,7 +527,11 @@ func (w *WALWatcher) readCheckpoint(checkpointDir string) error {
 		}
 		defer sr.Close()
 
+<<<<<<< HEAD
 		r := NewLiveReader(w.logger, prometheus.DefaultRegisterer, sr)
+=======
+		r := NewLiveReader(w.logger, w.reg, sr)
+>>>>>>> WAL Watcher needs to take in and pass a Registerer to LiveReader.
 		if err := w.readSegment(r, index, false); err != io.EOF && err != nil {
 			return errors.Wrap(err, "readSegment")
 		}
diff --git a/wal/wal_watcher_test.go b/wal/wal_watcher_test.go
index e8e1d4b7..190bcc17 100644
--- a/wal/wal_watcher_test.go
+++ b/wal/wal_watcher_test.go
@@ -135,7 +135,7 @@ func TestTailSamples(t *testing.T) {
 	testutil.Ok(t, err)
 
 	wt := newWriteToMock()
-	watcher := NewWALWatcher(nil, "", wt, dir)
+	watcher := NewWALWatcher(nil, nil, "", wt, dir)
 	watcher.startTime = now.UnixNano()
 
 	// Set the Watcher's metrics so they're not nil pointers.
@@ -145,7 +145,7 @@ func TestTailSamples(t *testing.T) {
 		testutil.Ok(t, err)
 		defer segment.Close()
 
-		reader := NewLiveReader(nil, prometheus.DefaultRegisterer, segment)
+		reader := NewLiveReader(nil, nil, segment)
 		// Use tail true so we can ensure we got the right number of samples.
 		watcher.readSegment(reader, i, true)
 	}
@@ -210,7 +210,7 @@ func TestReadToEndNoCheckpoint(t *testing.T) {
 	testutil.Ok(t, err)
 
 	wt := newWriteToMock()
-	watcher := NewWALWatcher(nil, "", wt, dir)
+	watcher := NewWALWatcher(nil, nil, "", wt, dir)
 	go watcher.Start()
 
 	expected := seriesCount
@@ -292,7 +292,7 @@ func TestReadToEndWithCheckpoint(t *testing.T) {
 	_, _, err = w.Segments()
 	testutil.Ok(t, err)
 	wt := newWriteToMock()
-	watcher := NewWALWatcher(nil, "", wt, dir)
+	watcher := NewWALWatcher(nil, nil, "", wt, dir)
 	go watcher.Start()
 
 	expected := seriesCount * 2
@@ -353,7 +353,7 @@ func TestReadCheckpoint(t *testing.T) {
 	testutil.Ok(t, err)
 
 	wt := newWriteToMock()
-	watcher := NewWALWatcher(nil, "", wt, dir)
+	watcher := NewWALWatcher(nil, nil, "", wt, dir)
 	// watcher.
 	go watcher.Start()
 
@@ -420,7 +420,7 @@ func TestReadCheckpointMultipleSegments(t *testing.T) {
 	}
 
 	wt := newWriteToMock()
-	watcher := NewWALWatcher(nil, "", wt, dir)
+	watcher := NewWALWatcher(nil, nil, "", wt, dir)
 	watcher.maxSegment = -1
 
 	// Set the Watcher's metrics so they're not nil pointers.
@@ -480,7 +480,7 @@ func TestCheckpointSeriesReset(t *testing.T) {
 	testutil.Ok(t, err)
 
 	wt := newWriteToMock()
-	watcher := NewWALWatcher(nil, "", wt, dir)
+	watcher := NewWALWatcher(nil, nil, "", wt, dir)
 	watcher.maxSegment = -1
 	go watcher.Start()
 

From 6c4df84a0a859048782c4d5ffabf5433c9d53d2c Mon Sep 17 00:00:00 2001
From: Callum Styan <callumstyan@gmail.com>
Date: Wed, 5 Jun 2019 11:42:20 -0700
Subject: [PATCH 04/16] Call Checkpoint in TestReadCheckpointMultipleSegments
 instead of manually creating a checkpoint dir and renaming files.

Signed-off-by: Callum Styan <callumstyan@gmail.com>
---
 wal/wal_watcher_test.go | 11 +++--------
 1 file changed, 3 insertions(+), 8 deletions(-)

diff --git a/wal/wal_watcher_test.go b/wal/wal_watcher_test.go
index 190bcc17..00129a23 100644
--- a/wal/wal_watcher_test.go
+++ b/wal/wal_watcher_test.go
@@ -410,14 +410,9 @@ func TestReadCheckpointMultipleSegments(t *testing.T) {
 		}
 	}
 
-	// At this point we should have at least 6 segments, lets create a checkpoint dir of the first 5.
-	checkpointDir := dir + "/wal/checkpoint.000004"
-	err = os.Mkdir(checkpointDir, 0777)
-	testutil.Ok(t, err)
-	for i := 0; i <= 4; i++ {
-		err := os.Rename(SegmentName(dir+"/wal", i), SegmentName(checkpointDir, i))
-		testutil.Ok(t, err)
-	}
+	Checkpoint(w, 0, 4, func(id uint64) bool {
+		return true
+	}, 0)
 
 	wt := newWriteToMock()
 	watcher := NewWALWatcher(nil, nil, "", wt, dir)

From f45385912523788b36ef892772820fcc79018af6 Mon Sep 17 00:00:00 2001
From: Callum Styan <callumstyan@gmail.com>
Date: Fri, 7 Jun 2019 14:26:36 -0700
Subject: [PATCH 05/16] Move tombstones to it's own package.

Signed-off-by: Callum Styan <callumstyan@gmail.com>
---
 block.go                                  | 24 +++++++-------
 compact.go                                | 18 +++++------
 compact_test.go                           |  2 +-
 db_test.go                                | 27 ++++++++--------
 head.go                                   | 30 ++++++++++++-----
 head_test.go                              | 29 +++++++++--------
 mocks_test.go                             |  2 +-
 querier.go                                | 27 ++++++++--------
 querier_test.go                           | 39 ++++++++++++-----------
 record/internal.go                        | 23 -------------
 record/record.go                          |  9 +++---
 record/record_test.go                     | 21 ++++++------
 {record => tombstones}/tombstones.go      | 19 ++++++++++-
 {record => tombstones}/tombstones_test.go |  2 +-
 wal.go                                    | 29 +++++++++--------
 wal/checkpoint.go                         |  3 +-
 wal/wal_watcher.go                        |  8 -----
 wal_test.go                               | 19 +++++------
 18 files changed, 170 insertions(+), 161 deletions(-)
 rename {record => tombstones}/tombstones.go (92%)
 rename {record => tombstones}/tombstones_test.go (99%)

diff --git a/block.go b/block.go
index 516bd00b..b19f7cb6 100644
--- a/block.go
+++ b/block.go
@@ -32,7 +32,7 @@ import (
 	"github.com/prometheus/tsdb/fileutil"
 	"github.com/prometheus/tsdb/index"
 	"github.com/prometheus/tsdb/labels"
-	"github.com/prometheus/tsdb/record"
+	"github.com/prometheus/tsdb/tombstones"
 )
 
 // IndexWriter serializes the index for a block of series data.
@@ -137,7 +137,7 @@ type BlockReader interface {
 	Chunks() (ChunkReader, error)
 
 	// Tombstones returns a TombstoneReader over the block's deleted data.
-	Tombstones() (record.TombstoneReader, error)
+	Tombstones() (tombstones.TombstoneReader, error)
 
 	// Meta provides meta information about the block reader.
 	Meta() BlockMeta
@@ -279,7 +279,7 @@ type Block struct {
 
 	chunkr     ChunkReader
 	indexr     IndexReader
-	tombstones record.TombstoneReader
+	tombstones tombstones.TombstoneReader
 
 	logger log.Logger
 
@@ -321,7 +321,7 @@ func OpenBlock(logger log.Logger, dir string, pool chunkenc.Pool) (pb *Block, er
 	}
 	closers = append(closers, ir)
 
-	tr, sizeTomb, err := record.ReadTombstones(dir)
+	tr, sizeTomb, err := tombstones.ReadTombstones(dir)
 	if err != nil {
 		return nil, err
 	}
@@ -412,7 +412,7 @@ func (pb *Block) Chunks() (ChunkReader, error) {
 }
 
 // Tombstones returns a new TombstoneReader against the block data.
-func (pb *Block) Tombstones() (record.TombstoneReader, error) {
+func (pb *Block) Tombstones() (tombstones.TombstoneReader, error) {
 	if err := pb.startRead(); err != nil {
 		return nil, err
 	}
@@ -483,7 +483,7 @@ func (r blockIndexReader) Close() error {
 }
 
 type blockTombstoneReader struct {
-	record.TombstoneReader
+	tombstones.TombstoneReader
 	b *Block
 }
 
@@ -519,7 +519,7 @@ func (pb *Block) Delete(mint, maxt int64, ms ...labels.Matcher) error {
 	ir := pb.indexr
 
 	// Choose only valid postings which have chunks in the time-range.
-	stones := record.NewMemTombstones()
+	stones := tombstones.NewMemTombstones()
 
 	var lset labels.Labels
 	var chks []chunks.Meta
@@ -535,7 +535,7 @@ Outer:
 			if chk.OverlapsClosedInterval(mint, maxt) {
 				// Delete only until the current values and not beyond.
 				tmin, tmax := clampInterval(mint, maxt, chks[0].MinTime, chks[len(chks)-1].MaxTime)
-				stones.AddInterval(p.At(), record.Interval{tmin, tmax})
+				stones.AddInterval(p.At(), tombstones.Interval{tmin, tmax})
 				continue Outer
 			}
 		}
@@ -545,7 +545,7 @@ Outer:
 		return p.Err()
 	}
 
-	err = pb.tombstones.Iter(func(id uint64, ivs record.Intervals) error {
+	err = pb.tombstones.Iter(func(id uint64, ivs tombstones.Intervals) error {
 		for _, iv := range ivs {
 			stones.AddInterval(id, iv)
 		}
@@ -557,7 +557,7 @@ Outer:
 	pb.tombstones = stones
 	pb.meta.Stats.NumTombstones = pb.tombstones.Total()
 
-	n, err := record.WriteTombstoneFile(pb.logger, pb.dir, pb.tombstones)
+	n, err := tombstones.WriteTombstoneFile(pb.logger, pb.dir, pb.tombstones)
 	if err != nil {
 		return err
 	}
@@ -575,7 +575,7 @@ Outer:
 func (pb *Block) CleanTombstones(dest string, c Compactor) (*ulid.ULID, error) {
 	numStones := 0
 
-	if err := pb.tombstones.Iter(func(id uint64, ivs record.Intervals) error {
+	if err := pb.tombstones.Iter(func(id uint64, ivs tombstones.Intervals) error {
 		numStones += len(ivs)
 		return nil
 	}); err != nil {
@@ -610,7 +610,7 @@ func (pb *Block) Snapshot(dir string) error {
 	for _, fname := range []string{
 		metaFilename,
 		indexFilename,
-		record.TombstoneFilename,
+		tombstones.TombstoneFilename,
 	} {
 		if err := os.Link(filepath.Join(pb.dir, fname), filepath.Join(blockDir, fname)); err != nil {
 			return errors.Wrapf(err, "create snapshot %s", fname)
diff --git a/compact.go b/compact.go
index 3e2652fd..01a4bee1 100644
--- a/compact.go
+++ b/compact.go
@@ -35,7 +35,7 @@ import (
 	"github.com/prometheus/tsdb/fileutil"
 	"github.com/prometheus/tsdb/index"
 	"github.com/prometheus/tsdb/labels"
-	"github.com/prometheus/tsdb/record"
+	"github.com/prometheus/tsdb/tombstones"
 )
 
 // ExponentialBlockRanges returns the time ranges based on the stepSize.
@@ -608,7 +608,7 @@ func (c *LeveledCompactor) write(dest string, meta *BlockMeta, blocks ...BlockRe
 	}
 
 	// Create an empty tombstones file.
-	if _, err := record.WriteTombstoneFile(c.logger, tmp, record.NewMemTombstones()); err != nil {
+	if _, err := tombstones.WriteTombstoneFile(c.logger, tmp, record.NewMemTombstones()); err != nil {
 		return errors.Wrap(err, "write new tombstones file")
 	}
 
@@ -877,15 +877,15 @@ type compactionSeriesSet struct {
 	p          index.Postings
 	index      IndexReader
 	chunks     ChunkReader
-	tombstones record.TombstoneReader
+	tombstones tombstones.TombstoneReader
 
 	l         labels.Labels
 	c         []chunks.Meta
-	intervals record.Intervals
+	intervals tombstones.Intervals
 	err       error
 }
 
-func newCompactionSeriesSet(i IndexReader, c ChunkReader, t record.TombstoneReader, p index.Postings) *compactionSeriesSet {
+func newCompactionSeriesSet(i IndexReader, c ChunkReader, t tombstones.TombstoneReader, p index.Postings) *compactionSeriesSet {
 	return &compactionSeriesSet{
 		index:      i,
 		chunks:     c,
@@ -915,7 +915,7 @@ func (c *compactionSeriesSet) Next() bool {
 	if len(c.intervals) > 0 {
 		chks := make([]chunks.Meta, 0, len(c.c))
 		for _, chk := range c.c {
-			if !(record.Interval{chk.MinTime, chk.MaxTime}.IsSubrange(c.intervals)) {
+			if !(tombstones.Interval{chk.MinTime, chk.MaxTime}.IsSubrange(c.intervals)) {
 				chks = append(chks, chk)
 			}
 		}
@@ -943,7 +943,7 @@ func (c *compactionSeriesSet) Err() error {
 	return c.p.Err()
 }
 
-func (c *compactionSeriesSet) At() (labels.Labels, []chunks.Meta, record.Intervals) {
+func (c *compactionSeriesSet) At() (labels.Labels, []chunks.Meta, tombstones.Intervals) {
 	return c.l, c.c, c.intervals
 }
 
@@ -953,7 +953,7 @@ type compactionMerger struct {
 	aok, bok  bool
 	l         labels.Labels
 	c         []chunks.Meta
-	intervals record.Intervals
+	intervals tombstones.Intervals
 }
 
 func newCompactionMerger(a, b ChunkSeriesSet) (*compactionMerger, error) {
@@ -1030,6 +1030,6 @@ func (c *compactionMerger) Err() error {
 	return c.b.Err()
 }
 
-func (c *compactionMerger) At() (labels.Labels, []chunks.Meta, record.Intervals) {
+func (c *compactionMerger) At() (labels.Labels, []chunks.Meta, tombstones.Intervals) {
 	return c.l, c.c, c.intervals
 }
diff --git a/compact_test.go b/compact_test.go
index bee741e6..fe61da24 100644
--- a/compact_test.go
+++ b/compact_test.go
@@ -30,8 +30,8 @@ import (
 	"github.com/prometheus/tsdb/chunks"
 	"github.com/prometheus/tsdb/fileutil"
 	"github.com/prometheus/tsdb/labels"
-	"github.com/prometheus/tsdb/record"
 	"github.com/prometheus/tsdb/testutil"
+	"github.com/prometheus/tsdb/tombstones"
 )
 
 func TestSplitByRange(t *testing.T) {
diff --git a/db_test.go b/db_test.go
index 7e1e1b96..66a44661 100644
--- a/db_test.go
+++ b/db_test.go
@@ -35,6 +35,7 @@ import (
 	"github.com/prometheus/tsdb/labels"
 	"github.com/prometheus/tsdb/record"
 	"github.com/prometheus/tsdb/testutil"
+	"github.com/prometheus/tsdb/tombstones"
 	"github.com/prometheus/tsdb/tsdbutil"
 	"github.com/prometheus/tsdb/wal"
 )
@@ -244,27 +245,27 @@ func TestDeleteSimple(t *testing.T) {
 	numSamples := int64(10)
 
 	cases := []struct {
-		intervals record.Intervals
+		intervals tombstones.Intervals
 		remaint   []int64
 	}{
 		{
-			intervals: record.Intervals{{0, 3}},
+			intervals: tombstones.Intervals{{0, 3}},
 			remaint:   []int64{4, 5, 6, 7, 8, 9},
 		},
 		{
-			intervals: record.Intervals{{1, 3}},
+			intervals: tombstones.Intervals{{1, 3}},
 			remaint:   []int64{0, 4, 5, 6, 7, 8, 9},
 		},
 		{
-			intervals: record.Intervals{{1, 3}, {4, 7}},
+			intervals: tombstones.Intervals{{1, 3}, {4, 7}},
 			remaint:   []int64{0, 8, 9},
 		},
 		{
-			intervals: record.Intervals{{1, 3}, {4, 700}},
+			intervals: tombstones.Intervals{{1, 3}, {4, 700}},
 			remaint:   []int64{0},
 		},
 		{ // This case is to ensure that labels and symbols are deleted.
-			intervals: record.Intervals{{0, 9}},
+			intervals: tombstones.Intervals{{0, 9}},
 			remaint:   []int64{},
 		},
 	}
@@ -562,11 +563,11 @@ func TestDB_SnapshotWithDelete(t *testing.T) {
 
 	testutil.Ok(t, app.Commit())
 	cases := []struct {
-		intervals record.Intervals
+		intervals tombstones.Intervals
 		remaint   []int64
 	}{
 		{
-			intervals: record.Intervals{{1, 3}, {4, 7}},
+			intervals: tombstones.Intervals{{1, 3}, {4, 7}},
 			remaint:   []int64{0, 8, 9},
 		},
 	}
@@ -889,11 +890,11 @@ func TestTombstoneClean(t *testing.T) {
 
 	testutil.Ok(t, app.Commit())
 	cases := []struct {
-		intervals record.Intervals
+		intervals tombstones.Intervals
 		remaint   []int64
 	}{
 		{
-			intervals: record.Intervals{{1, 3}, {4, 7}},
+			intervals: tombstones.Intervals{{1, 3}, {4, 7}},
 			remaint:   []int64{0, 8, 9},
 		},
 	}
@@ -965,7 +966,7 @@ func TestTombstoneClean(t *testing.T) {
 		}
 
 		for _, b := range db.Blocks() {
-			testutil.Equals(t, record.NewMemTombstones(), b.tombstones)
+			testutil.Equals(t, tombstones.NewMemTombstones(), b.tombstones)
 		}
 	}
 }
@@ -991,8 +992,8 @@ func TestTombstoneCleanFail(t *testing.T) {
 		block, err := OpenBlock(nil, blockDir, nil)
 		testutil.Ok(t, err)
 		// Add some some fake tombstones to trigger the compaction.
-		tomb := record.NewMemTombstones()
-		tomb.AddInterval(0, record.Interval{0, 1})
+		tomb := tombstones.NewMemTombstones()
+		tomb.AddInterval(0, tombstones.Interval{0, 1})
 		block.tombstones = tomb
 
 		db.blocks = append(db.blocks, block)
diff --git a/head.go b/head.go
index c1c8e6d5..b74b7daa 100644
--- a/head.go
+++ b/head.go
@@ -34,6 +34,7 @@ import (
 	"github.com/prometheus/tsdb/index"
 	"github.com/prometheus/tsdb/labels"
 	"github.com/prometheus/tsdb/record"
+	"github.com/prometheus/tsdb/tombstones"
 	"github.com/prometheus/tsdb/wal"
 )
 
@@ -44,7 +45,7 @@ var (
 
 	// emptyTombstoneReader is a no-op Tombstone Reader.
 	// This is used by head to satisfy the Tombstones() function call.
-	emptyTombstoneReader = record.NewMemTombstones()
+	emptyTombstoneReader = tombstones.NewMemTombstones()
 )
 
 // Head handles reads and writes of time series data within a time window.
@@ -350,11 +351,20 @@ func (h *Head) loadWAL(r *wal.Reader, multiRef map[uint64]uint64) (err error) {
 	}
 
 	var (
+<<<<<<< HEAD
 		dec       RecordDecoder
 		series    []RefSeries
 		samples   []RefSample
 		tstones   []Stone
 		allStones = newMemTombstones()
+=======
+		dec       record.RecordDecoder
+		series    []record.RefSeries
+		samples   []record.RefSample
+		tstones   []tombstones.Stone
+		allStones = tombstones.NewMemTombstones()
+		err       error
+>>>>>>> Move tombstones to it's own package.
 	)
 	defer func() {
 		if err := allStones.Close(); err != nil {
@@ -381,7 +391,7 @@ func (h *Head) loadWAL(r *wal.Reader, multiRef map[uint64]uint64) (err error) {
 				if !created {
 					// There's already a different ref for this series.
 					multiRefLock.Lock()
-					multiRef[s.Ref] = series.ref
+					multiRef[s.Ref] = series.Ref
 					multiRefLock.Unlock()
 				}
 
@@ -468,11 +478,15 @@ func (h *Head) loadWAL(r *wal.Reader, multiRef map[uint64]uint64) (err error) {
 	}
 	wg.Wait()
 
+<<<<<<< HEAD
 	if r.Err() != nil {
 		return errors.Wrap(r.Err(), "read records")
 	}
 
 	if err := allStones.Iter(func(ref uint64, dranges Intervals) error {
+=======
+	if err := allStones.Iter(func(ref uint64, dranges tombstones.Intervals) error {
+>>>>>>> Move tombstones to it's own package.
 		return h.chunkRewrite(ref, dranges)
 	}); err != nil {
 		return errors.Wrap(r.Err(), "deleting samples from tombstones")
@@ -683,7 +697,7 @@ func (h *rangeHead) Chunks() (ChunkReader, error) {
 	return h.head.chunksRange(h.mint, h.maxt), nil
 }
 
-func (h *rangeHead) Tombstones() (record.TombstoneReader, error) {
+func (h *rangeHead) Tombstones() (tombstones.TombstoneReader, error) {
 	return emptyTombstoneReader, nil
 }
 
@@ -954,7 +968,7 @@ func (h *Head) Delete(mint, maxt int64, ms ...labels.Matcher) error {
 		return errors.Wrap(err, "select series")
 	}
 
-	var stones []record.Stone
+	var stones []tombstones.Stone
 	dirty := false
 	for p.Next() {
 		series := h.series.getByID(p.At())
@@ -966,9 +980,9 @@ func (h *Head) Delete(mint, maxt int64, ms ...labels.Matcher) error {
 		// Delete only until the current values and not beyond.
 		t0, t1 = clampInterval(mint, maxt, t0, t1)
 		if h.wal != nil {
-			stones = append(stones, record.Stone{p.At(), record.Intervals{{t0, t1}}})
+			stones = append(stones, tombstones.Stone{p.At(), tombstones.Intervals{{t0, t1}}})
 		}
-		if err := h.chunkRewrite(p.At(), record.Intervals{{t0, t1}}); err != nil {
+		if err := h.chunkRewrite(p.At(), tombstones.Intervals{{t0, t1}}); err != nil {
 			return errors.Wrap(err, "delete samples")
 		}
 		dirty = true
@@ -995,7 +1009,7 @@ func (h *Head) Delete(mint, maxt int64, ms ...labels.Matcher) error {
 // chunkRewrite re-writes the chunks which overlaps with deleted ranges
 // and removes the samples in the deleted ranges.
 // Chunks is deleted if no samples are left at the end.
-func (h *Head) chunkRewrite(ref uint64, dranges record.Intervals) (err error) {
+func (h *Head) chunkRewrite(ref uint64, dranges tombstones.Intervals) (err error) {
 	if len(dranges) == 0 {
 		return nil
 	}
@@ -1087,7 +1101,7 @@ func (h *Head) gc() {
 }
 
 // Tombstones returns a new reader over the head's tombstones
-func (h *Head) Tombstones() (record.TombstoneReader, error) {
+func (h *Head) Tombstones() (tombstones.TombstoneReader, error) {
 	return emptyTombstoneReader, nil
 }
 
diff --git a/head_test.go b/head_test.go
index 50b66196..f297fcc6 100644
--- a/head_test.go
+++ b/head_test.go
@@ -32,6 +32,7 @@ import (
 	"github.com/prometheus/tsdb/labels"
 	"github.com/prometheus/tsdb/record"
 	"github.com/prometheus/tsdb/testutil"
+	"github.com/prometheus/tsdb/tombstones"
 	"github.com/prometheus/tsdb/tsdbutil"
 	"github.com/prometheus/tsdb/wal"
 )
@@ -59,7 +60,7 @@ func populateTestWAL(t testing.TB, w *wal.WAL, recs []interface{}) {
 			testutil.Ok(t, w.Log(enc.Series(v, nil)))
 		case []record.RefSample:
 			testutil.Ok(t, w.Log(enc.Samples(v, nil)))
-		case []record.Stone:
+		case []tombstones.Stone:
 			testutil.Ok(t, w.Log(enc.Tombstones(v, nil)))
 		}
 	}
@@ -372,27 +373,27 @@ func TestHeadDeleteSimple(t *testing.T) {
 	lblDefault := labels.Label{"a", "b"}
 
 	cases := []struct {
-		dranges  record.Intervals
+		dranges  tombstones.Intervals
 		smplsExp []sample
 	}{
 		{
-			dranges:  record.Intervals{{0, 3}},
+			dranges:  tombstones.Intervals{{0, 3}},
 			smplsExp: buildSmpls([]int64{4, 5, 6, 7, 8, 9}),
 		},
 		{
-			dranges:  record.Intervals{{1, 3}},
+			dranges:  tombstones.Intervals{{1, 3}},
 			smplsExp: buildSmpls([]int64{0, 4, 5, 6, 7, 8, 9}),
 		},
 		{
-			dranges:  record.Intervals{{1, 3}, {4, 7}},
+			dranges:  tombstones.Intervals{{1, 3}, {4, 7}},
 			smplsExp: buildSmpls([]int64{0, 8, 9}),
 		},
 		{
-			dranges:  record.Intervals{{1, 3}, {4, 700}},
+			dranges:  tombstones.Intervals{{1, 3}, {4, 700}},
 			smplsExp: buildSmpls([]int64{0}),
 		},
 		{ // This case is to ensure that labels and symbols are deleted.
-			dranges:  record.Intervals{{0, 9}},
+			dranges:  tombstones.Intervals{{0, 9}},
 			smplsExp: buildSmpls([]int64{}),
 		},
 	}
@@ -605,7 +606,7 @@ func TestDeletedSamplesAndSeriesStillInWALAfterCheckpoint(t *testing.T) {
 			series++
 		case []record.RefSample:
 			samples++
-		case []record.Stone:
+		case []tombstones.Stone:
 			stones++
 		default:
 			t.Fatalf("unknown record type")
@@ -693,18 +694,18 @@ func TestDelete_e2e(t *testing.T) {
 	// Delete a time-range from each-selector.
 	dels := []struct {
 		ms     []labels.Matcher
-		drange record.Intervals
+		drange tombstones.Intervals
 	}{
 		{
 			ms:     []labels.Matcher{labels.NewEqualMatcher("a", "b")},
-			drange: record.Intervals{{300, 500}, {600, 670}},
+			drange: tombstones.Intervals{{300, 500}, {600, 670}},
 		},
 		{
 			ms: []labels.Matcher{
 				labels.NewEqualMatcher("a", "b"),
 				labels.NewEqualMatcher("job", "prom-k8s"),
 			},
-			drange: record.Intervals{{300, 500}, {100, 670}},
+			drange: tombstones.Intervals{{300, 500}, {100, 670}},
 		},
 		{
 			ms: []labels.Matcher{
@@ -712,7 +713,7 @@ func TestDelete_e2e(t *testing.T) {
 				labels.NewEqualMatcher("instance", "localhost:9090"),
 				labels.NewEqualMatcher("job", "prometheus"),
 			},
-			drange: record.Intervals{{300, 400}, {100, 6700}},
+			drange: tombstones.Intervals{{300, 400}, {100, 6700}},
 		},
 		// TODO: Add Regexp Matchers.
 	}
@@ -795,7 +796,7 @@ func boundedSamples(full []tsdbutil.Sample, mint, maxt int64) []tsdbutil.Sample
 	return full
 }
 
-func deletedSamples(full []tsdbutil.Sample, dranges record.Intervals) []tsdbutil.Sample {
+func deletedSamples(full []tsdbutil.Sample, dranges tombstones.Intervals) []tsdbutil.Sample {
 	ds := make([]tsdbutil.Sample, 0, len(full))
 Outer:
 	for _, s := range full {
@@ -1105,7 +1106,7 @@ func TestWalRepair_DecodingError(t *testing.T) {
 			func(rec []byte) []byte {
 				return rec[:3]
 			},
-			enc.Tombstones([]record.Stone{{Ref: 1, Intervals: record.Intervals{}}}, []byte{}),
+			enc.Tombstones([]tombstones.Stone{{Ref: 1, Intervals: tombstones.Intervals{}}}, []byte{}),
 			9,
 			5,
 		},
diff --git a/mocks_test.go b/mocks_test.go
index f48ea9aa..44d8e9a5 100644
--- a/mocks_test.go
+++ b/mocks_test.go
@@ -18,7 +18,7 @@ import (
 	"github.com/prometheus/tsdb/chunks"
 	"github.com/prometheus/tsdb/index"
 	"github.com/prometheus/tsdb/labels"
-	"github.com/prometheus/tsdb/record"
+	"github.com/prometheus/tsdb/tombstones"
 )
 
 type mockIndexWriter struct {
diff --git a/querier.go b/querier.go
index 2b655106..0e905783 100644
--- a/querier.go
+++ b/querier.go
@@ -26,6 +26,7 @@ import (
 	"github.com/prometheus/tsdb/index"
 	"github.com/prometheus/tsdb/labels"
 	"github.com/prometheus/tsdb/record"
+	"github.com/prometheus/tsdb/tombstones"
 )
 
 // Querier provides querying access over time series data of a fixed
@@ -205,7 +206,7 @@ func NewBlockQuerier(b BlockReader, mint, maxt int64) (Querier, error) {
 type blockQuerier struct {
 	index      IndexReader
 	chunks     ChunkReader
-	tombstones record.TombstoneReader
+	tombstones tombstones.TombstoneReader
 
 	closed bool
 
@@ -671,7 +672,7 @@ func (s *mergedVerticalSeriesSet) Next() bool {
 // actual series itself.
 type ChunkSeriesSet interface {
 	Next() bool
-	At() (labels.Labels, []chunks.Meta, record.Intervals)
+	At() (labels.Labels, []chunks.Meta, tombstones.Intervals)
 	Err() error
 }
 
@@ -680,19 +681,19 @@ type ChunkSeriesSet interface {
 type baseChunkSeries struct {
 	p          index.Postings
 	index      IndexReader
-	tombstones record.TombstoneReader
+	tombstones tombstones.TombstoneReader
 
 	lset      labels.Labels
 	chks      []chunks.Meta
-	intervals record.Intervals
+	intervals tombstones.Intervals
 	err       error
 }
 
 // LookupChunkSeries retrieves all series for the given matchers and returns a ChunkSeriesSet
 // over them. It drops chunks based on tombstones in the given reader.
-func LookupChunkSeries(ir IndexReader, tr record.TombstoneReader, ms ...labels.Matcher) (ChunkSeriesSet, error) {
+func LookupChunkSeries(ir IndexReader, tr tombstones.TombstoneReader, ms ...labels.Matcher) (ChunkSeriesSet, error) {
 	if tr == nil {
-		tr = record.NewMemTombstones()
+		tr = tombstones.NewMemTombstones()
 	}
 	p, err := PostingsForMatchers(ir, ms...)
 	if err != nil {
@@ -705,7 +706,7 @@ func LookupChunkSeries(ir IndexReader, tr record.TombstoneReader, ms ...labels.M
 	}, nil
 }
 
-func (s *baseChunkSeries) At() (labels.Labels, []chunks.Meta, record.Intervals) {
+func (s *baseChunkSeries) At() (labels.Labels, []chunks.Meta, tombstones.Intervals) {
 	return s.lset, s.chks, s.intervals
 }
 
@@ -741,7 +742,7 @@ func (s *baseChunkSeries) Next() bool {
 			// Only those chunks that are not entirely deleted.
 			chks := make([]chunks.Meta, 0, len(s.chks))
 			for _, chk := range s.chks {
-				if !(record.Interval{chk.MinTime, chk.MaxTime}.IsSubrange(s.intervals)) {
+				if !(tombstones.Interval{chk.MinTime, chk.MaxTime}.IsSubrange(s.intervals)) {
 					chks = append(chks, chk)
 				}
 			}
@@ -768,10 +769,10 @@ type populatedChunkSeries struct {
 	err       error
 	chks      []chunks.Meta
 	lset      labels.Labels
-	intervals record.Intervals
+	intervals tombstones.Intervals
 }
 
-func (s *populatedChunkSeries) At() (labels.Labels, []chunks.Meta, record.Intervals) {
+func (s *populatedChunkSeries) At() (labels.Labels, []chunks.Meta, tombstones.Intervals) {
 	return s.lset, s.chks, s.intervals
 }
 
@@ -866,7 +867,7 @@ type chunkSeries struct {
 
 	mint, maxt int64
 
-	intervals record.Intervals
+	intervals tombstones.Intervals
 }
 
 func (s *chunkSeries) Labels() labels.Labels {
@@ -1067,7 +1068,7 @@ type chunkSeriesIterator struct {
 
 	maxt, mint int64
 
-	intervals record.Intervals
+	intervals tombstones.Intervals
 }
 
 func newChunkSeriesIterator(cs []chunks.Meta, dranges Intervals, mint, maxt int64) *chunkSeriesIterator {
@@ -1169,7 +1170,7 @@ func (it *chunkSeriesIterator) Err() error {
 type deletedIterator struct {
 	it chunkenc.Iterator
 
-	intervals record.Intervals
+	intervals tombstones.Intervals
 }
 
 func (it *deletedIterator) At() (int64, float64) {
diff --git a/querier_test.go b/querier_test.go
index a1bdf395..b24ca131 100644
--- a/querier_test.go
+++ b/querier_test.go
@@ -31,6 +31,7 @@ import (
 	"github.com/prometheus/tsdb/labels"
 	"github.com/prometheus/tsdb/record"
 	"github.com/prometheus/tsdb/testutil"
+	"github.com/prometheus/tsdb/tombstones"
 	"github.com/prometheus/tsdb/tsdbutil"
 )
 
@@ -382,7 +383,7 @@ Outer:
 		querier := &blockQuerier{
 			index:      ir,
 			chunks:     cr,
-			tombstones: record.NewMemTombstones(),
+			tombstones: tombstones.NewMemTombstones(),
 
 			mint: c.mint,
 			maxt: c.maxt,
@@ -429,7 +430,7 @@ func TestBlockQuerierDelete(t *testing.T) {
 	cases := struct {
 		data []seriesSamples
 
-		tombstones record.TombstoneReader
+		tombstones tombstones.TombstoneReader
 		queries    []query
 	}{
 		data: []seriesSamples{
@@ -474,10 +475,10 @@ func TestBlockQuerierDelete(t *testing.T) {
 				},
 			},
 		},
-		tombstones: &record.MemTombstones{IntvlGroups: map[uint64]record.Intervals{
-			1: record.Intervals{{1, 3}},
-			2: record.Intervals{{1, 3}, {6, 10}},
-			3: record.Intervals{{6, 10}},
+		tombstones: &tombstones.MemTombstones{IntvlGroups: map[uint64]tombstones.Intervals{
+			1: tombstones.Intervals{{1, 3}},
+			2: tombstones.Intervals{{1, 3}, {6, 10}},
+			3: tombstones.Intervals{{6, 10}},
 		}},
 		queries: []query{
 			{
@@ -651,7 +652,7 @@ func TestBaseChunkSeries(t *testing.T) {
 		bcs := &baseChunkSeries{
 			p:          index.NewListPostings(tc.postings),
 			index:      mi,
-			tombstones: record.NewMemTombstones(),
+			tombstones: tombstones.NewMemTombstones(),
 		}
 
 		i := 0
@@ -1173,7 +1174,7 @@ func (m *mockChunkSeriesSet) Next() bool {
 	return m.i < len(m.l)
 }
 
-func (m *mockChunkSeriesSet) At() (labels.Labels, []chunks.Meta, record.Intervals) {
+func (m *mockChunkSeriesSet) At() (labels.Labels, []chunks.Meta, tombstones.Intervals) {
 	return m.l[m.i], m.cm[m.i], nil
 }
 
@@ -1268,18 +1269,18 @@ func TestDeletedIterator(t *testing.T) {
 	}
 
 	cases := []struct {
-		r record.Intervals
+		r tombstones.Intervals
 	}{
-		{r: record.Intervals{{1, 20}}},
-		{r: record.Intervals{{1, 10}, {12, 20}, {21, 23}, {25, 30}}},
-		{r: record.Intervals{{1, 10}, {12, 20}, {20, 30}}},
-		{r: record.Intervals{{1, 10}, {12, 23}, {25, 30}}},
-		{r: record.Intervals{{1, 23}, {12, 20}, {25, 30}}},
-		{r: record.Intervals{{1, 23}, {12, 20}, {25, 3000}}},
-		{r: record.Intervals{{0, 2000}}},
-		{r: record.Intervals{{500, 2000}}},
-		{r: record.Intervals{{0, 200}}},
-		{r: record.Intervals{{1000, 20000}}},
+		{r: tombstones.Intervals{{1, 20}}},
+		{r: tombstones.Intervals{{1, 10}, {12, 20}, {21, 23}, {25, 30}}},
+		{r: tombstones.Intervals{{1, 10}, {12, 20}, {20, 30}}},
+		{r: tombstones.Intervals{{1, 10}, {12, 23}, {25, 30}}},
+		{r: tombstones.Intervals{{1, 23}, {12, 20}, {25, 30}}},
+		{r: tombstones.Intervals{{1, 23}, {12, 20}, {25, 3000}}},
+		{r: tombstones.Intervals{{0, 2000}}},
+		{r: tombstones.Intervals{{500, 2000}}},
+		{r: tombstones.Intervals{{0, 200}}},
+		{r: tombstones.Intervals{{1000, 20000}}},
 	}
 
 	for _, c := range cases {
diff --git a/record/internal.go b/record/internal.go
index 840023c6..dbc166db 100644
--- a/record/internal.go
+++ b/record/internal.go
@@ -14,8 +14,6 @@ package record
 
 import (
 	"errors"
-	"hash"
-	"hash/crc32"
 	"math"
 	"os"
 	"path/filepath"
@@ -40,21 +38,6 @@ var (
 	ErrAmendSample = errors.New("amending sample")
 )
 
-// The table gets initialized with sync.Once but may still cause a race
-// with any other use of the crc32 package anywhere. Thus we initialize it
-// before.
-var castagnoliTable *crc32.Table
-
-func init() {
-	castagnoliTable = crc32.MakeTable(crc32.Castagnoli)
-}
-
-// NewCRC32 initializes a CRC32 hash with a preconfigured polynomial, so the
-// polynomial may be easily changed in one location at a later time, if necessary.
-func NewCRC32() hash.Hash32 {
-	return crc32.New(castagnoliTable)
-}
-
 type sample struct {
 	t int64
 	v float64
@@ -68,12 +51,6 @@ func (s sample) V() float64 {
 	return s.v
 }
 
-// SizeReader returns the size of the object in bytes.
-type SizeReader interface {
-	// Size returns the size in bytes.
-	Size() int64
-}
-
 // RefSeries is the series labels with the series ID.
 type RefSeries struct {
 	Ref    uint64
diff --git a/record/record.go b/record/record.go
index 887f9275..cf854b1d 100644
--- a/record/record.go
+++ b/record/record.go
@@ -21,6 +21,7 @@ import (
 	"github.com/pkg/errors"
 	"github.com/prometheus/tsdb/encoding"
 	"github.com/prometheus/tsdb/labels"
+	"github.com/prometheus/tsdb/tombstones"
 )
 
 // RecordType represents the data type of a record.
@@ -123,16 +124,16 @@ func (d *RecordDecoder) Samples(rec []byte, samples []RefSample) ([]RefSample, e
 }
 
 // Tombstones appends tombstones in rec to the given slice.
-func (d *RecordDecoder) Tombstones(rec []byte, tstones []Stone) ([]Stone, error) {
+func (d *RecordDecoder) Tombstones(rec []byte, tstones []tombstones.Stone) ([]tombstones.Stone, error) {
 	dec := encoding.Decbuf{B: rec}
 
 	if RecordType(dec.Byte()) != RecordTombstones {
 		return nil, errors.New("invalid record type")
 	}
 	for dec.Len() > 0 && dec.Err() == nil {
-		tstones = append(tstones, Stone{
+		tstones = append(tstones, tombstones.Stone{
 			Ref: dec.Be64(),
-			Intervals: Intervals{
+			Intervals: tombstones.Intervals{
 				{Mint: dec.Varint64(), Maxt: dec.Varint64()},
 			},
 		})
@@ -193,7 +194,7 @@ func (e *RecordEncoder) Samples(samples []RefSample, b []byte) []byte {
 }
 
 // Tombstones appends the encoded tombstones to b and returns the resulting slice.
-func (e *RecordEncoder) Tombstones(tstones []Stone, b []byte) []byte {
+func (e *RecordEncoder) Tombstones(tstones []tombstones.Stone, b []byte) []byte {
 	buf := encoding.Encbuf{B: b}
 	buf.PutByte(byte(RecordTombstones))
 
diff --git a/record/record_test.go b/record/record_test.go
index fdc69514..b9705238 100644
--- a/record/record_test.go
+++ b/record/record_test.go
@@ -21,6 +21,7 @@ import (
 	"github.com/prometheus/tsdb/encoding"
 	"github.com/prometheus/tsdb/labels"
 	"github.com/prometheus/tsdb/testutil"
+	"github.com/prometheus/tsdb/tombstones"
 )
 
 func TestRecord_EncodeDecode(t *testing.T) {
@@ -54,23 +55,23 @@ func TestRecord_EncodeDecode(t *testing.T) {
 
 	// Intervals get split up into single entries. So we don't get back exactly
 	// what we put in.
-	tstones := []Stone{
-		{Ref: 123, Intervals: Intervals{
+	tstones := []tombstones.Stone{
+		{Ref: 123, Intervals: tombstones.Intervals{
 			{Mint: -1000, Maxt: 1231231},
 			{Mint: 5000, Maxt: 0},
 		}},
-		{Ref: 13, Intervals: Intervals{
+		{Ref: 13, Intervals: tombstones.Intervals{
 			{Mint: -1000, Maxt: -11},
 			{Mint: 5000, Maxt: 1000},
 		}},
 	}
 	decTstones, err := dec.Tombstones(enc.Tombstones(tstones, nil), nil)
 	testutil.Ok(t, err)
-	testutil.Equals(t, []Stone{
-		{Ref: 123, Intervals: Intervals{{Mint: -1000, Maxt: 1231231}}},
-		{Ref: 123, Intervals: Intervals{{Mint: 5000, Maxt: 0}}},
-		{Ref: 13, Intervals: Intervals{{Mint: -1000, Maxt: -11}}},
-		{Ref: 13, Intervals: Intervals{{Mint: 5000, Maxt: 1000}}},
+	testutil.Equals(t, []tombstones.Stone{
+		{Ref: 123, Intervals: tombstones.Intervals{{Mint: -1000, Maxt: 1231231}}},
+		{Ref: 123, Intervals: tombstones.Intervals{{Mint: 5000, Maxt: 0}}},
+		{Ref: 13, Intervals: tombstones.Intervals{{Mint: -1000, Maxt: -11}}},
+		{Ref: 13, Intervals: tombstones.Intervals{{Mint: 5000, Maxt: 1000}}},
 	}, decTstones)
 }
 
@@ -104,8 +105,8 @@ func TestRecord_Corruputed(t *testing.T) {
 	})
 
 	t.Run("Test corrupted tombstone record", func(t *testing.T) {
-		tstones := []Stone{
-			{Ref: 123, Intervals: Intervals{
+		tstones := []tombstones.Stone{
+			{Ref: 123, Intervals: tombstones.Intervals{
 				{Mint: -1000, Maxt: 1231231},
 				{Mint: 5000, Maxt: 0},
 			}},
diff --git a/record/tombstones.go b/tombstones/tombstones.go
similarity index 92%
rename from record/tombstones.go
rename to tombstones/tombstones.go
index 23f62ee7..c655f06d 100644
--- a/record/tombstones.go
+++ b/tombstones/tombstones.go
@@ -11,11 +11,13 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 
-package record
+package tombstones
 
 import (
 	"encoding/binary"
 	"fmt"
+	"hash"
+	"hash/crc32"
 	"io"
 	"io/ioutil"
 	"os"
@@ -39,6 +41,21 @@ const (
 	tombstoneFormatV1 = 1
 )
 
+// The table gets initialized with sync.Once but may still cause a race
+// with any other use of the crc32 package anywhere. Thus we initialize it
+// before.
+var castagnoliTable *crc32.Table
+
+func init() {
+	castagnoliTable = crc32.MakeTable(crc32.Castagnoli)
+}
+
+// NewCRC32 initializes a CRC32 hash with a preconfigured polynomial, so the
+// polynomial may be easily changed in one location at a later time, if necessary.
+func NewCRC32() hash.Hash32 {
+	return crc32.New(castagnoliTable)
+}
+
 // TombstoneReader gives access to tombstone intervals by series reference.
 type TombstoneReader interface {
 	// Get returns deletion intervals for the series with the given reference.
diff --git a/record/tombstones_test.go b/tombstones/tombstones_test.go
similarity index 99%
rename from record/tombstones_test.go
rename to tombstones/tombstones_test.go
index 1d8bb8e6..d30eff23 100644
--- a/record/tombstones_test.go
+++ b/tombstones/tombstones_test.go
@@ -11,7 +11,7 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 
-package record
+package tombstones
 
 import (
 	"io/ioutil"
diff --git a/wal.go b/wal.go
index 27e4c0ba..08cbedfe 100644
--- a/wal.go
+++ b/wal.go
@@ -35,6 +35,7 @@ import (
 	"github.com/prometheus/tsdb/fileutil"
 	"github.com/prometheus/tsdb/labels"
 	"github.com/prometheus/tsdb/record"
+	"github.com/prometheus/tsdb/tombstones"
 	"github.com/prometheus/tsdb/wal"
 )
 
@@ -92,7 +93,7 @@ type WAL interface {
 	Reader() WALReader
 	LogSeries([]record.RefSeries) error
 	LogSamples([]record.RefSample) error
-	LogDeletes([]record.Stone) error
+	LogDeletes([]tombstones.Stone) error
 	Truncate(mint int64, keep func(uint64) bool) error
 	Close() error
 }
@@ -102,7 +103,7 @@ type WALReader interface {
 	Read(
 		seriesf func([]record.RefSeries),
 		samplesf func([]record.RefSample),
-		deletesf func([]record.Stone),
+		deletesf func([]tombstones.Stone),
 	) error
 }
 
@@ -228,7 +229,7 @@ type repairingWALReader struct {
 func (r *repairingWALReader) Read(
 	seriesf func([]record.RefSeries),
 	samplesf func([]record.RefSample),
-	deletesf func([]record.Stone),
+	deletesf func([]tombstones.Stone),
 ) error {
 	err := r.r.Read(seriesf, samplesf, deletesf)
 	if err == nil {
@@ -466,7 +467,7 @@ func (w *SegmentWAL) LogSamples(samples []record.RefSample) error {
 }
 
 // LogDeletes write a batch of new deletes to the log.
-func (w *SegmentWAL) LogDeletes(stones []record.Stone) error {
+func (w *SegmentWAL) LogDeletes(stones []tombstones.Stone) error {
 	buf := w.getBuffer()
 
 	flag := w.encodeDeletes(buf, stones)
@@ -811,7 +812,7 @@ func (w *SegmentWAL) encodeSamples(buf *encoding.Encbuf, samples []record.RefSam
 	return walSamplesSimple
 }
 
-func (w *SegmentWAL) encodeDeletes(buf *encoding.Encbuf, stones []record.Stone) uint8 {
+func (w *SegmentWAL) encodeDeletes(buf *encoding.Encbuf, stones []tombstones.Stone) uint8 {
 	for _, s := range stones {
 		for _, iv := range s.Intervals {
 			buf.PutBE64(s.Ref)
@@ -859,7 +860,7 @@ func (r *walReader) Err() error {
 func (r *walReader) Read(
 	seriesf func([]record.RefSeries),
 	samplesf func([]record.RefSample),
-	deletesf func([]record.Stone),
+	deletesf func([]tombstones.Stone),
 ) error {
 	// Concurrency for replaying the WAL is very limited. We at least split out decoding and
 	// processing into separate threads.
@@ -890,7 +891,7 @@ func (r *walReader) Read(
 				}
 				//lint:ignore SA6002 safe to ignore and actually fixing it has some performance penalty.
 				samplePool.Put(v[:0])
-			case []record.Stone:
+			case []tombstones.Stone:
 				if deletesf != nil {
 					deletesf(v)
 				}
@@ -954,11 +955,11 @@ func (r *walReader) Read(
 				}
 			}
 		case WALEntryDeletes:
-			var deletes []record.Stone
+			var deletes []tombstones.Stone
 			if v := deletePool.Get(); v == nil {
-				deletes = make([]record.Stone, 0, 512)
+				deletes = make([]tombstones.Stone, 0, 512)
 			} else {
-				deletes = v.([]record.Stone)
+				deletes = v.([]tombstones.Stone)
 			}
 
 			err = r.decodeDeletes(flag, b, &deletes)
@@ -1168,13 +1169,13 @@ func (r *walReader) decodeSamples(flag byte, b []byte, res *[]record.RefSample)
 	return nil
 }
 
-func (r *walReader) decodeDeletes(flag byte, b []byte, res *[]record.Stone) error {
+func (r *walReader) decodeDeletes(flag byte, b []byte, res *[]tombstones.Stone) error {
 	dec := &encoding.Decbuf{B: b}
 
 	for dec.Len() > 0 && dec.Err() == nil {
-		*res = append(*res, record.Stone{
+		*res = append(*res, tombstones.Stone{
 			Ref: dec.Be64(),
-			Intervals: record.Intervals{
+			Intervals: tombstones.Intervals{
 				{Mint: dec.Varint64(), Maxt: dec.Varint64()},
 			},
 		})
@@ -1270,7 +1271,7 @@ func MigrateWAL(logger log.Logger, dir string) (err error) {
 			}
 			err = repl.Log(enc.Samples(s, b[:0]))
 		},
-		func(s []record.Stone) {
+		func(s []tombstones.Stone) {
 			if err != nil {
 				return
 			}
diff --git a/wal/checkpoint.go b/wal/checkpoint.go
index d9595ff3..1e9caa84 100644
--- a/wal/checkpoint.go
+++ b/wal/checkpoint.go
@@ -28,6 +28,7 @@ import (
 	tsdb_errors "github.com/prometheus/tsdb/errors"
 	"github.com/prometheus/tsdb/fileutil"
 	"github.com/prometheus/tsdb/record"
+	"github.com/prometheus/tsdb/tombstones"
 )
 
 // CheckpointStats returns stats about a created checkpoint.
@@ -151,7 +152,7 @@ func Checkpoint(w *WAL, from, to int, keep func(id uint64) bool, mint int64) (*C
 	var (
 		series  []record.RefSeries
 		samples []record.RefSample
-		tstones []record.Stone
+		tstones []tombstones.Stone
 		dec     record.RecordDecoder
 		enc     record.RecordEncoder
 		buf     []byte
diff --git a/wal/wal_watcher.go b/wal/wal_watcher.go
index aa00acf8..be57975f 100644
--- a/wal/wal_watcher.go
+++ b/wal/wal_watcher.go
@@ -308,11 +308,7 @@ func (w *WALWatcher) watch(segmentNum int, tail bool) error {
 	}
 	defer segment.Close()
 
-<<<<<<< HEAD
-	reader := NewLiveReader(w.logger, prometheus.DefaultRegisterer, segment)
-=======
 	reader := NewLiveReader(w.logger, w.reg, segment)
->>>>>>> WAL Watcher needs to take in and pass a Registerer to LiveReader.
 
 	readTicker := time.NewTicker(readPeriod)
 	defer readTicker.Stop()
@@ -527,11 +523,7 @@ func (w *WALWatcher) readCheckpoint(checkpointDir string) error {
 		}
 		defer sr.Close()
 
-<<<<<<< HEAD
-		r := NewLiveReader(w.logger, prometheus.DefaultRegisterer, sr)
-=======
 		r := NewLiveReader(w.logger, w.reg, sr)
->>>>>>> WAL Watcher needs to take in and pass a Registerer to LiveReader.
 		if err := w.readSegment(r, index, false); err != io.EOF && err != nil {
 			return errors.Wrap(err, "readSegment")
 		}
diff --git a/wal_test.go b/wal_test.go
index c9a9168d..c84a899d 100644
--- a/wal_test.go
+++ b/wal_test.go
@@ -31,6 +31,7 @@ import (
 	"github.com/prometheus/tsdb/labels"
 	"github.com/prometheus/tsdb/record"
 	"github.com/prometheus/tsdb/testutil"
+	"github.com/prometheus/tsdb/tombstones"
 	"github.com/prometheus/tsdb/wal"
 )
 
@@ -175,7 +176,7 @@ func TestSegmentWAL_Log_Restore(t *testing.T) {
 	var (
 		recordedSeries  [][]record.RefSeries
 		recordedSamples [][]record.RefSample
-		recordedDeletes [][]record.Stone
+		recordedDeletes [][]tombstones.Stone
 	)
 	var totalSamples int
 
@@ -193,7 +194,7 @@ func TestSegmentWAL_Log_Restore(t *testing.T) {
 		var (
 			resultSeries  [][]record.RefSeries
 			resultSamples [][]record.RefSample
-			resultDeletes [][]record.Stone
+			resultDeletes [][]tombstones.Stone
 		)
 
 		serf := func(series []record.RefSeries) {
@@ -211,9 +212,9 @@ func TestSegmentWAL_Log_Restore(t *testing.T) {
 			}
 		}
 
-		delf := func(stones []record.Stone) {
+		delf := func(stones []tombstones.Stone) {
 			if len(stones) > 0 {
-				cst := make([]record.Stone, len(stones))
+				cst := make([]tombstones.Stone, len(stones))
 				copy(cst, stones)
 				resultDeletes = append(resultDeletes, cst)
 			}
@@ -230,7 +231,7 @@ func TestSegmentWAL_Log_Restore(t *testing.T) {
 		// Insert in batches and generate different amounts of samples for each.
 		for i := 0; i < len(series); i += stepSize {
 			var samples []record.RefSample
-			var stones []record.Stone
+			var stones []tombstones.Stone
 
 			for j := 0; j < i*10; j++ {
 				samples = append(samples, record.RefSample{
@@ -242,7 +243,7 @@ func TestSegmentWAL_Log_Restore(t *testing.T) {
 
 			for j := 0; j < i*20; j++ {
 				ts := rand.Int63()
-				stones = append(stones, record.Stone{rand.Uint64(), record.Intervals{{ts, ts + rand.Int63n(10000)}}})
+				stones = append(stones, tombstones.Stone{rand.Uint64(), tombstones.Intervals{{ts, ts + rand.Int63n(10000)}}})
 			}
 
 			lbls := series[i : i+stepSize]
@@ -498,8 +499,8 @@ func TestMigrateWAL_Fuzz(t *testing.T) {
 		{Ref: 3, T: 100, V: 200},
 		{Ref: 4, T: 300, V: 400},
 	}))
-	testutil.Ok(t, oldWAL.LogDeletes([]record.Stone{
-		{Ref: 1, Intervals: []record.Interval{{100, 200}}},
+	testutil.Ok(t, oldWAL.LogDeletes([]tombstones.Stone{
+		{Ref: 1, Intervals: []tombstones.Interval{{100, 200}}},
 	}))
 
 	testutil.Ok(t, oldWAL.Close())
@@ -558,7 +559,7 @@ func TestMigrateWAL_Fuzz(t *testing.T) {
 			{Ref: 200, Labels: labels.FromStrings("xyz", "def", "foo", "bar")},
 		},
 		[]record.RefSample{{Ref: 3, T: 100, V: 200}, {Ref: 4, T: 300, V: 400}},
-		[]record.Stone{{Ref: 1, Intervals: []record.Interval{{100, 200}}}},
+		[]tombstones.Stone{{Ref: 1, Intervals: []tombstones.Interval{{100, 200}}}},
 		[]record.RefSample{{Ref: 500, T: 1, V: 1}},
 	}, res)
 

From adfe7f438604105025e7010492ccbe56090458af Mon Sep 17 00:00:00 2001
From: Callum Styan <callumstyan@gmail.com>
Date: Thu, 20 Jun 2019 14:21:05 -0700
Subject: [PATCH 06/16] Fix some stuff I broke during rebase.

Signed-off-by: Callum Styan <callumstyan@gmail.com>
---
 compact.go               |  4 ++--
 go.mod                   |  1 -
 go.sum                   |  2 --
 head.go                  | 19 +++----------------
 head_test.go             | 36 ++++++++++++++++++------------------
 record/internal.go       | 12 ++++++------
 tombstones/tombstones.go | 12 ++++++------
 wal/wal_watcher.go       |  5 +++--
 8 files changed, 38 insertions(+), 53 deletions(-)

diff --git a/compact.go b/compact.go
index 01a4bee1..8fa8edfd 100644
--- a/compact.go
+++ b/compact.go
@@ -608,7 +608,7 @@ func (c *LeveledCompactor) write(dest string, meta *BlockMeta, blocks ...BlockRe
 	}
 
 	// Create an empty tombstones file.
-	if _, err := tombstones.WriteTombstoneFile(c.logger, tmp, record.NewMemTombstones()); err != nil {
+	if _, err := tombstones.WriteTombstoneFile(c.logger, tmp, tombstones.NewMemTombstones()); err != nil {
 		return errors.Wrap(err, "write new tombstones file")
 	}
 
@@ -769,7 +769,7 @@ func (c *LeveledCompactor) populateBlock(blocks []BlockReader, meta *BlockMeta,
 			//
 			// TODO think how to avoid the typecasting to verify when it is head block.
 			if _, isHeadChunk := chk.Chunk.(*safeChunk); isHeadChunk && chk.MaxTime >= meta.MaxTime {
-				dranges = append(dranges, Interval{Mint: meta.MaxTime, Maxt: math.MaxInt64})
+				dranges = append(dranges, tombstones.Interval{Mint: meta.MaxTime, Maxt: math.MaxInt64})
 
 			} else
 			// Sanity check for disk blocks.
diff --git a/go.mod b/go.mod
index c75e4ed7..ccdd4372 100644
--- a/go.mod
+++ b/go.mod
@@ -8,7 +8,6 @@ require (
 	github.com/oklog/ulid v1.3.1
 	github.com/pkg/errors v0.8.0
 	github.com/prometheus/client_golang v1.0.0
-	github.com/prometheus/prometheus v2.5.0+incompatible
 	golang.org/x/sync v0.0.0-20181221193216-37e7f081c4d4
 	golang.org/x/sys v0.0.0-20181116152217-5ac8a444bdc5
 	gopkg.in/alecthomas/kingpin.v2 v2.2.6
diff --git a/go.sum b/go.sum
index ad7f9516..e854d810 100644
--- a/go.sum
+++ b/go.sum
@@ -59,8 +59,6 @@ github.com/prometheus/procfs v0.0.0-20181005140218-185b4288413d h1:GoAlyOgbOEIFd
 github.com/prometheus/procfs v0.0.0-20181005140218-185b4288413d/go.mod h1:c3At6R/oaqEKCNdg8wHV1ftS6bRYblBhIjjI8uT2IGk=
 github.com/prometheus/procfs v0.0.2 h1:6LJUbpNm42llc4HRCuvApCSWB/WfhuNo9K98Q9sNGfs=
 github.com/prometheus/procfs v0.0.2/go.mod h1:TjEm7ze935MbeOT/UhFTIMYKhuLP4wbCsTZCD3I8kEA=
-github.com/prometheus/prometheus v2.5.0+incompatible h1:7QPitgO2kOFG8ecuRn9O/4L9+10He72rVRJvMXrE9Hg=
-github.com/prometheus/prometheus v2.5.0+incompatible/go.mod h1:oAIUtOny2rjMX0OWN5vPR5/q/twIROJvdqnQKDdil/s=
 github.com/sirupsen/logrus v1.2.0/go.mod h1:LxeOpSwHxABJmUn/MG1IvRgCAasNZTLOkJPxbbu5VWo=
 github.com/spaolacci/murmur3 v0.0.0-20180118202830-f09979ecbc72 h1:qLC7fQah7D6K1B0ujays3HV9gkFtllcxhzImRR7ArPQ=
 github.com/spaolacci/murmur3 v0.0.0-20180118202830-f09979ecbc72/go.mod h1:JwIasOWyU6f++ZhiEuf87xNszmSA2myDM2Kzu9HwQUA=
diff --git a/head.go b/head.go
index b74b7daa..0c2675c8 100644
--- a/head.go
+++ b/head.go
@@ -351,20 +351,11 @@ func (h *Head) loadWAL(r *wal.Reader, multiRef map[uint64]uint64) (err error) {
 	}
 
 	var (
-<<<<<<< HEAD
-		dec       RecordDecoder
-		series    []RefSeries
-		samples   []RefSample
-		tstones   []Stone
-		allStones = newMemTombstones()
-=======
 		dec       record.RecordDecoder
 		series    []record.RefSeries
 		samples   []record.RefSample
 		tstones   []tombstones.Stone
 		allStones = tombstones.NewMemTombstones()
-		err       error
->>>>>>> Move tombstones to it's own package.
 	)
 	defer func() {
 		if err := allStones.Close(); err != nil {
@@ -389,7 +380,7 @@ func (h *Head) loadWAL(r *wal.Reader, multiRef map[uint64]uint64) (err error) {
 				series, created := h.getOrCreateWithID(s.Ref, s.Labels.Hash(), s.Labels)
 
 				if !created {
-					// There's already a different ref for this series.
+					// There's already a different Ref for this series.
 					multiRefLock.Lock()
 					multiRef[s.Ref] = series.Ref
 					multiRefLock.Unlock()
@@ -478,15 +469,11 @@ func (h *Head) loadWAL(r *wal.Reader, multiRef map[uint64]uint64) (err error) {
 	}
 	wg.Wait()
 
-<<<<<<< HEAD
 	if r.Err() != nil {
 		return errors.Wrap(r.Err(), "read records")
 	}
 
-	if err := allStones.Iter(func(ref uint64, dranges Intervals) error {
-=======
 	if err := allStones.Iter(func(ref uint64, dranges tombstones.Intervals) error {
->>>>>>> Move tombstones to it's own package.
 		return h.chunkRewrite(ref, dranges)
 	}); err != nil {
 		return errors.Wrap(r.Err(), "deleting samples from tombstones")
@@ -1347,8 +1334,8 @@ func (h *headIndexReader) Series(ref uint64, lbls *labels.Labels, chks *[]chunks
 			continue
 		}
 		// Set the head chunks as open (being appended to).
-		maxTime := c.maxTime
-		if s.headChunk == c {
+		maxTime := c.MaxTime
+		if s.HeadChunk == c {
 			maxTime = math.MaxInt64
 		}
 
diff --git a/head_test.go b/head_test.go
index f297fcc6..7b4caded 100644
--- a/head_test.go
+++ b/head_test.go
@@ -102,28 +102,28 @@ func TestHead_ReadWAL(t *testing.T) {
 	for _, compress := range []bool{false, true} {
 		t.Run(fmt.Sprintf("compress=%t", compress), func(t *testing.T) {
 			entries := []interface{}{
-				[]RefSeries{
+				[]record.RefSeries{
 					{Ref: 10, Labels: labels.FromStrings("a", "1")},
 					{Ref: 11, Labels: labels.FromStrings("a", "2")},
 					{Ref: 100, Labels: labels.FromStrings("a", "3")},
 				},
-				[]RefSample{
+				[]record.RefSample{
 					{Ref: 0, T: 99, V: 1},
 					{Ref: 10, T: 100, V: 2},
 					{Ref: 100, T: 100, V: 3},
 				},
-				[]RefSeries{
+				[]record.RefSeries{
 					{Ref: 50, Labels: labels.FromStrings("a", "4")},
 					// This series has two refs pointing to it.
 					{Ref: 101, Labels: labels.FromStrings("a", "3")},
 				},
-				[]RefSample{
+				[]record.RefSample{
 					{Ref: 10, T: 101, V: 5},
 					{Ref: 50, T: 101, V: 6},
 					{Ref: 101, T: 101, V: 7},
 				},
-				[]Stone{
-					{ref: 0, intervals: []Interval{{Mint: 99, Maxt: 101}}},
+				[]tombstones.Stone{
+					{Ref: 0, Intervals: []tombstones.Interval{{Mint: 99, Maxt: 101}}},
 				},
 			}
 			dir, err := ioutil.TempDir("", "test_read_wal")
@@ -148,10 +148,10 @@ func TestHead_ReadWAL(t *testing.T) {
 			s50 := head.series.getByID(50)
 			s100 := head.series.getByID(100)
 
-			testutil.Equals(t, labels.FromStrings("a", "1"), s10.lset)
-			testutil.Equals(t, (*memSeries)(nil), s11) // Series without samples should be garbage colected at head.Init().
-			testutil.Equals(t, labels.FromStrings("a", "4"), s50.lset)
-			testutil.Equals(t, labels.FromStrings("a", "3"), s100.lset)
+			testutil.Equals(t, labels.FromStrings("a", "1"), s10.Lset)
+			testutil.Equals(t, (*record.MemSeries)(nil), s11) // Series without samples should be garbage colected at head.Init().
+			testutil.Equals(t, labels.FromStrings("a", "4"), s50.Lset)
+			testutil.Equals(t, labels.FromStrings("a", "3"), s100.Lset)
 
 			expandChunk := func(c chunkenc.Iterator) (x []sample) {
 				for c.Next() {
@@ -328,14 +328,14 @@ func TestHeadDeleteSeriesWithoutSamples(t *testing.T) {
 	for _, compress := range []bool{false, true} {
 		t.Run(fmt.Sprintf("compress=%t", compress), func(t *testing.T) {
 			entries := []interface{}{
-				[]RefSeries{
+				[]record.RefSeries{
 					{Ref: 10, Labels: labels.FromStrings("a", "1")},
 				},
-				[]RefSample{},
-				[]RefSeries{
+				[]record.RefSample{},
+				[]record.RefSeries{
 					{Ref: 50, Labels: labels.FromStrings("a", "2")},
 				},
-				[]RefSample{
+				[]record.RefSample{
 					{Ref: 50, T: 80, V: 1},
 					{Ref: 50, T: 90, V: 1},
 				},
@@ -1057,9 +1057,9 @@ func TestHead_LogRollback(t *testing.T) {
 
 			testutil.Equals(t, 1, len(recs))
 
-			series, ok := recs[0].([]RefSeries)
+			series, ok := recs[0].([]record.RefSeries)
 			testutil.Assert(t, ok, "expected series record but got %+v", recs[0])
-			testutil.Equals(t, []RefSeries{{Ref: 1, Labels: labels.FromStrings("a", "b")}}, series)
+			testutil.Equals(t, []record.RefSeries{{Ref: 1, Labels: labels.FromStrings("a", "b")}}, series)
 		})
 	}
 }
@@ -1067,7 +1067,7 @@ func TestHead_LogRollback(t *testing.T) {
 // TestWalRepair_DecodingError ensures that a repair is run for an error
 // when decoding a record.
 func TestWalRepair_DecodingError(t *testing.T) {
-	var enc RecordEncoder
+	var enc record.RecordEncoder
 	for name, test := range map[string]struct {
 		corrFunc  func(rec []byte) []byte // Func that applies the corruption to a record.
 		rec       []byte
@@ -1079,7 +1079,7 @@ func TestWalRepair_DecodingError(t *testing.T) {
 				// Do not modify the base record because it is Logged multiple times.
 				res := make([]byte, len(rec))
 				copy(res, rec)
-				res[0] = byte(RecordInvalid)
+				res[0] = byte(record.RecordInvalid)
 				return res
 			},
 			enc.Series([]record.RefSeries{{Ref: 1, Labels: labels.FromStrings("a", "b")}}, []byte{}),
diff --git a/record/internal.go b/record/internal.go
index dbc166db..89a0cc2d 100644
--- a/record/internal.go
+++ b/record/internal.go
@@ -74,8 +74,8 @@ type MemSeries struct {
 	PendingCommit bool // Whether there are samples waiting to be committed to this series.
 	Chunks        []*MemChunk
 	Lset          labels.Labels
+	HeadChunk     *MemChunk
 
-	headChunk    *MemChunk
 	chunkRange   int64
 	firstChunkID int
 
@@ -117,7 +117,7 @@ func (s *MemSeries) cut(mint int64) *MemChunk {
 		MaxTime: math.MinInt64,
 	}
 	s.Chunks = append(s.Chunks, c)
-	s.headChunk = c
+	s.HeadChunk = c
 
 	// Set upper bound on when the next chunk must be started. An earlier timestamp
 	// may be chosen dynamically at a later point.
@@ -143,7 +143,7 @@ func (s *MemSeries) ChunksMetas() []chunks.Meta {
 // and 'chunkRange', like how it would appear after 'newMemSeries(...)'.
 func (s *MemSeries) Reset() {
 	s.Chunks = nil
-	s.headChunk = nil
+	s.HeadChunk = nil
 	s.firstChunkID = 0
 	s.nextAt = math.MinInt64
 	s.sampleBuf = [4]sample{}
@@ -197,9 +197,9 @@ func (s *MemSeries) TruncateChunksBefore(mint int64) (removed int) {
 	s.Chunks = append(s.Chunks[:0], s.Chunks[k:]...)
 	s.firstChunkID += k
 	if len(s.Chunks) == 0 {
-		s.headChunk = nil
+		s.HeadChunk = nil
 	} else {
-		s.headChunk = s.Chunks[len(s.Chunks)-1]
+		s.HeadChunk = s.Chunks[len(s.Chunks)-1]
 	}
 
 	return k
@@ -270,7 +270,7 @@ func (s *MemSeries) Iterator(id int) chunkenc.Iterator {
 }
 
 func (s *MemSeries) head() *MemChunk {
-	return s.headChunk
+	return s.HeadChunk
 }
 
 type MemChunk struct {
diff --git a/tombstones/tombstones.go b/tombstones/tombstones.go
index c655f06d..8b79632b 100644
--- a/tombstones/tombstones.go
+++ b/tombstones/tombstones.go
@@ -50,9 +50,9 @@ func init() {
 	castagnoliTable = crc32.MakeTable(crc32.Castagnoli)
 }
 
-// NewCRC32 initializes a CRC32 hash with a preconfigured polynomial, so the
+// newCRC32 initializes a CRC32 hash with a preconfigured polynomial, so the
 // polynomial may be easily changed in one location at a later time, if necessary.
-func NewCRC32() hash.Hash32 {
+func newCRC32() hash.Hash32 {
 	return crc32.New(castagnoliTable)
 }
 
@@ -72,7 +72,7 @@ type TombstoneReader interface {
 }
 
 func WriteTombstoneFile(logger log.Logger, dir string, tr TombstoneReader) (int64, error) {
-	path := filepath.Join(dir, tombstoneFilename)
+	path := filepath.Join(dir, TombstoneFilename)
 	tmp := path + ".tmp"
 	hash := newCRC32()
 	var size int
@@ -151,9 +151,9 @@ type Stone struct {
 }
 
 func ReadTombstones(dir string) (TombstoneReader, int64, error) {
-	b, err := ioutil.ReadFile(filepath.Join(dir, tombstoneFilename))
+	b, err := ioutil.ReadFile(filepath.Join(dir, TombstoneFilename))
 	if os.IsNotExist(err) {
-		return newMemTombstones(), 0, nil
+		return NewMemTombstones(), 0, nil
 	} else if err != nil {
 		return nil, 0, err
 	}
@@ -175,7 +175,7 @@ func ReadTombstones(dir string) (TombstoneReader, int64, error) {
 	}
 
 	// Verify checksum.
-	hash := NewCRC32()
+	hash := newCRC32()
 	if _, err := hash.Write(d.Get()); err != nil {
 		return nil, 0, errors.Wrap(err, "write to hash")
 	}
diff --git a/wal/wal_watcher.go b/wal/wal_watcher.go
index be57975f..bc21994d 100644
--- a/wal/wal_watcher.go
+++ b/wal/wal_watcher.go
@@ -76,6 +76,7 @@ var (
 		},
 		[]string{consumer},
 	)
+	lrMetrics = NewLiveReaderMetrics(prometheus.DefaultRegisterer)
 )
 
 // This function is copied from prometheus/prometheus/pkg/timestamp to avoid adding vendor to TSDB repo.
@@ -308,7 +309,7 @@ func (w *WALWatcher) watch(segmentNum int, tail bool) error {
 	}
 	defer segment.Close()
 
-	reader := NewLiveReader(w.logger, w.reg, segment)
+	reader := NewLiveReader(w.logger, lrMetrics, segment)
 
 	readTicker := time.NewTicker(readPeriod)
 	defer readTicker.Stop()
@@ -523,7 +524,7 @@ func (w *WALWatcher) readCheckpoint(checkpointDir string) error {
 		}
 		defer sr.Close()
 
-		r := NewLiveReader(w.logger, w.reg, sr)
+		r := NewLiveReader(w.logger, lrMetrics, sr)
 		if err := w.readSegment(r, index, false); err != io.EOF && err != nil {
 			return errors.Wrap(err, "readSegment")
 		}

From f2c8171016ce1f3e572d0efa67ff3fbbc5fbb051 Mon Sep 17 00:00:00 2001
From: Callum Styan <callumstyan@gmail.com>
Date: Wed, 3 Jul 2019 18:31:48 -0700
Subject: [PATCH 07/16] Keep memSeries in head.go

Signed-off-by: Callum Styan <callumstyan@gmail.com>
---
 head.go            | 382 +++++++++++++++++++++++++++++++++++++++------
 head_test.go       |  62 ++++----
 querier_test.go    |  13 --
 record/internal.go | 269 +------------------------------
 4 files changed, 364 insertions(+), 362 deletions(-)

diff --git a/head.go b/head.go
index 0c2675c8..fa385ca6 100644
--- a/head.go
+++ b/head.go
@@ -55,6 +55,7 @@ type Head struct {
 	wal        *wal.WAL
 	logger     log.Logger
 	appendPool sync.Pool
+	seriesPool sync.Pool
 	bytesPool  sync.Pool
 	numSeries  uint64
 
@@ -252,7 +253,7 @@ func (h *Head) processWALSamples(
 	defer close(output)
 
 	// Mitigate lock contention in getByID.
-	refSeries := map[uint64]*record.MemSeries{}
+	refSeries := map[uint64]*memSeries{}
 
 	mint, maxt := int64(math.MaxInt64), int64(math.MinInt64)
 
@@ -382,7 +383,7 @@ func (h *Head) loadWAL(r *wal.Reader, multiRef map[uint64]uint64) (err error) {
 				if !created {
 					// There's already a different Ref for this series.
 					multiRefLock.Lock()
-					multiRef[s.Ref] = series.Ref
+					multiRef[s.Ref] = series.ref
 					multiRefLock.Unlock()
 				}
 
@@ -770,6 +771,7 @@ func (h *Head) appender() *headAppender {
 		mint:         math.MaxInt64,
 		maxt:         math.MinInt64,
 		samples:      h.getAppendBuffer(),
+		sampleSeries: h.getSeriesBuffer(),
 	}
 }
 
@@ -793,6 +795,19 @@ func (h *Head) putAppendBuffer(b []record.RefSample) {
 	h.appendPool.Put(b[:0])
 }
 
+func (h *Head) getSeriesBuffer() []*memSeries {
+	b := h.seriesPool.Get()
+	if b == nil {
+		return make([]*memSeries, 0, 512)
+	}
+	return b.([]*memSeries)
+}
+
+func (h *Head) putSeriesBuffer(b []*memSeries) {
+	//lint:ignore SA6002 safe to ignore and actually fixing it has some performance penalty.
+	h.seriesPool.Put(b[:0])
+}
+
 func (h *Head) getBytesBuffer() []byte {
 	b := h.bytesPool.Get()
 	if b == nil {
@@ -811,8 +826,9 @@ type headAppender struct {
 	minValidTime int64 // No samples below this timestamp are allowed.
 	mint, maxt   int64
 
-	series  []record.RefSeries
-	samples []record.RefSample
+	series       []record.RefSeries
+	samples      []record.RefSample
+	sampleSeries []*memSeries
 }
 
 func (a *headAppender) Add(lset labels.Labels, t int64, v float64) (uint64, error) {
@@ -826,11 +842,11 @@ func (a *headAppender) Add(lset labels.Labels, t int64, v float64) (uint64, erro
 	s, created := a.head.getOrCreate(lset.Hash(), lset)
 	if created {
 		a.series = append(a.series, record.RefSeries{
-			Ref:    s.Ref,
+			Ref:    s.ref,
 			Labels: lset,
 		})
 	}
-	return s.Ref, a.AddFast(s.Ref, t, v)
+	return s.ref, a.AddFast(s.ref, t, v)
 }
 
 func (a *headAppender) AddFast(ref uint64, t int64, v float64) error {
@@ -847,7 +863,7 @@ func (a *headAppender) AddFast(ref uint64, t int64, v float64) error {
 		s.Unlock()
 		return err
 	}
-	s.PendingCommit = true
+	s.pendingCommit = true
 	s.Unlock()
 
 	if t < a.mint {
@@ -858,11 +874,11 @@ func (a *headAppender) AddFast(ref uint64, t int64, v float64) error {
 	}
 
 	a.samples = append(a.samples, record.RefSample{
-		Ref:    ref,
-		T:      t,
-		V:      v,
-		Series: s,
+		Ref: ref,
+		T:   t,
+		V:   v,
 	})
+	a.sampleSeries = append(a.sampleSeries, s)
 	return nil
 }
 
@@ -899,18 +915,20 @@ func (a *headAppender) log() error {
 func (a *headAppender) Commit() error {
 	defer a.head.metrics.activeAppenders.Dec()
 	defer a.head.putAppendBuffer(a.samples)
+	defer a.head.putSeriesBuffer(a.sampleSeries)
 
 	if err := a.log(); err != nil {
 		return errors.Wrap(err, "write to WAL")
 	}
 
+	var series *memSeries
 	total := len(a.samples)
-
-	for _, s := range a.samples {
-		s.Series.Lock()
-		ok, chunkCreated := s.Series.Append(s.T, s.V)
-		s.Series.PendingCommit = false
-		s.Series.Unlock()
+	for i, s := range a.samples {
+		series = a.sampleSeries[i]
+		series.Lock()
+		ok, chunkCreated := series.Append(s.T, s.V)
+		series.pendingCommit = false
+		series.Unlock()
 
 		if !ok {
 			total--
@@ -929,10 +947,12 @@ func (a *headAppender) Commit() error {
 
 func (a *headAppender) Rollback() error {
 	a.head.metrics.activeAppenders.Dec()
-	for _, s := range a.samples {
-		s.Series.Lock()
-		s.Series.PendingCommit = false
-		s.Series.Unlock()
+	var series *memSeries
+	for i := range a.samples {
+		series = a.sampleSeries[i]
+		series.Lock()
+		series.pendingCommit = false
+		series.Unlock()
 	}
 	a.head.putAppendBuffer(a.samples)
 
@@ -1004,7 +1024,7 @@ func (h *Head) chunkRewrite(ref uint64, dranges tombstones.Intervals) (err error
 	ms := h.series.getByID(ref)
 	ms.Lock()
 	defer ms.Unlock()
-	if len(ms.Chunks) == 0 {
+	if len(ms.chunks) == 0 {
 		return nil
 	}
 
@@ -1216,7 +1236,7 @@ func (h *headChunkReader) Chunk(ref uint64) (chunkenc.Chunk, error) {
 
 type safeChunk struct {
 	chunkenc.Chunk
-	s   *record.MemSeries
+	s   *memSeries
 	cid int
 }
 
@@ -1286,7 +1306,7 @@ func (h *headIndexReader) Postings(name, value string) (index.Postings, error) {
 }
 
 func (h *headIndexReader) SortedPostings(p index.Postings) index.Postings {
-	series := make([]*record.MemSeries, 0, 128)
+	series := make([]*memSeries, 0, 128)
 
 	// Fetch all the series only once.
 	for p.Next() {
@@ -1302,13 +1322,13 @@ func (h *headIndexReader) SortedPostings(p index.Postings) index.Postings {
 	}
 
 	sort.Slice(series, func(i, j int) bool {
-		return labels.Compare(series[i].Lset, series[j].Lset) < 0
+		return labels.Compare(series[i].lset, series[j].lset) < 0
 	})
 
 	// Convert back to list.
 	ep := make([]uint64, 0, len(series))
 	for _, p := range series {
-		ep = append(ep, p.Ref)
+		ep = append(ep, p.ref)
 	}
 	return index.NewListPostings(ep)
 }
@@ -1321,21 +1341,21 @@ func (h *headIndexReader) Series(ref uint64, lbls *labels.Labels, chks *[]chunks
 		h.head.metrics.seriesNotFound.Inc()
 		return record.ErrNotFound
 	}
-	*lbls = append((*lbls)[:0], s.Lset...)
+	*lbls = append((*lbls)[:0], s.lset...)
 
 	s.Lock()
 	defer s.Unlock()
 
 	*chks = (*chks)[:0]
 
-	for i, c := range s.Chunks {
+	for i, c := range s.chunks {
 		// Do not expose chunks that are outside of the specified range.
 		if !c.OverlapsClosedInterval(h.mint, h.maxt) {
 			continue
 		}
 		// Set the head chunks as open (being appended to).
 		maxTime := c.MaxTime
-		if s.HeadChunk == c {
+		if s.headChunk == c {
 			maxTime = math.MaxInt64
 		}
 
@@ -1359,7 +1379,7 @@ func (h *headIndexReader) LabelIndices() ([][]string, error) {
 	return res, nil
 }
 
-func (h *Head) getOrCreate(hash uint64, lset labels.Labels) (*record.MemSeries, bool) {
+func (h *Head) getOrCreate(hash uint64, lset labels.Labels) (*memSeries, bool) {
 	// Just using `getOrSet` below would be semantically sufficient, but we'd create
 	// a new series on every sample inserted via Add(), which causes allocations
 	// and makes our series IDs rather random and harder to compress in postings.
@@ -1374,8 +1394,8 @@ func (h *Head) getOrCreate(hash uint64, lset labels.Labels) (*record.MemSeries,
 	return h.getOrCreateWithID(id, hash, lset)
 }
 
-func (h *Head) getOrCreateWithID(id, hash uint64, lset labels.Labels) (*record.MemSeries, bool) {
-	s := record.NewMemSeries(lset, id, h.chunkRange)
+func (h *Head) getOrCreateWithID(id, hash uint64, lset labels.Labels) (*memSeries, bool) {
+	s := newMemSeries(lset, id, h.chunkRange)
 
 	s, created := h.series.getOrSet(hash, s)
 	if !created {
@@ -1409,21 +1429,21 @@ func (h *Head) getOrCreateWithID(id, hash uint64, lset labels.Labels) (*record.M
 // on top of a regular hashmap and holds a slice of series to resolve hash collisions.
 // Its methods require the hash to be submitted with it to avoid re-computations throughout
 // the code.
-type seriesHashmap map[uint64][]*record.MemSeries
+type seriesHashmap map[uint64][]*memSeries
 
-func (m seriesHashmap) get(hash uint64, lset labels.Labels) *record.MemSeries {
+func (m seriesHashmap) get(hash uint64, lset labels.Labels) *memSeries {
 	for _, s := range m[hash] {
-		if s.Lset.Equals(lset) {
+		if s.lset.Equals(lset) {
 			return s
 		}
 	}
 	return nil
 }
 
-func (m seriesHashmap) set(hash uint64, s *record.MemSeries) {
+func (m seriesHashmap) set(hash uint64, s *memSeries) {
 	l := m[hash]
 	for i, prev := range l {
-		if prev.Lset.Equals(s.Lset) {
+		if prev.lset.Equals(s.lset) {
 			l[i] = s
 			return
 		}
@@ -1432,9 +1452,9 @@ func (m seriesHashmap) set(hash uint64, s *record.MemSeries) {
 }
 
 func (m seriesHashmap) del(hash uint64, lset labels.Labels) {
-	var rem []*record.MemSeries
+	var rem []*memSeries
 	for _, s := range m[hash] {
-		if !s.Lset.Equals(lset) {
+		if !s.lset.Equals(lset) {
 			rem = append(rem, s)
 		}
 	}
@@ -1450,7 +1470,7 @@ func (m seriesHashmap) del(hash uint64, lset labels.Labels) {
 // with the maps was profiled to be slower – likely due to the additional pointer
 // dereferences.
 type stripeSeries struct {
-	series [stripeSize]map[uint64]*record.MemSeries
+	series [stripeSize]map[uint64]*memSeries
 	hashes [stripeSize]seriesHashmap
 	locks  [stripeSize]stripeLock
 }
@@ -1470,7 +1490,7 @@ func newStripeSeries() *stripeSeries {
 	s := &stripeSeries{}
 
 	for i := range s.series {
-		s.series[i] = map[uint64]*record.MemSeries{}
+		s.series[i] = map[uint64]*memSeries{}
 	}
 	for i := range s.hashes {
 		s.hashes[i] = seriesHashmap{}
@@ -1495,7 +1515,7 @@ func (s *stripeSeries) gc(mint int64) (map[uint64]struct{}, int) {
 				series.Lock()
 				rmChunks += series.TruncateChunksBefore(mint)
 
-				if len(series.Chunks) > 0 || series.PendingCommit {
+				if len(series.chunks) > 0 || series.pendingCommit {
 					series.Unlock()
 					continue
 				}
@@ -1505,15 +1525,15 @@ func (s *stripeSeries) gc(mint int64) (map[uint64]struct{}, int) {
 				// series alike.
 				// If we don't hold them all, there's a very small chance that a series receives
 				// samples again while we are half-way into deleting it.
-				j := int(series.Ref & stripeMask)
+				j := int(series.ref & stripeMask)
 
 				if i != j {
 					s.locks[j].Lock()
 				}
 
-				deleted[series.Ref] = struct{}{}
-				s.hashes[i].del(hash, series.Lset)
-				delete(s.series[j], series.Ref)
+				deleted[series.ref] = struct{}{}
+				s.hashes[i].del(hash, series.lset)
+				delete(s.series[j], series.ref)
 
 				if i != j {
 					s.locks[j].Unlock()
@@ -1529,7 +1549,7 @@ func (s *stripeSeries) gc(mint int64) (map[uint64]struct{}, int) {
 	return deleted, rmChunks
 }
 
-func (s *stripeSeries) getByID(id uint64) *record.MemSeries {
+func (s *stripeSeries) getByID(id uint64) *memSeries {
 	i := id & stripeMask
 
 	s.locks[i].RLock()
@@ -1539,7 +1559,7 @@ func (s *stripeSeries) getByID(id uint64) *record.MemSeries {
 	return series
 }
 
-func (s *stripeSeries) getByHash(hash uint64, lset labels.Labels) *record.MemSeries {
+func (s *stripeSeries) getByHash(hash uint64, lset labels.Labels) *memSeries {
 	i := hash & stripeMask
 
 	s.locks[i].RLock()
@@ -1549,27 +1569,285 @@ func (s *stripeSeries) getByHash(hash uint64, lset labels.Labels) *record.MemSer
 	return series
 }
 
-func (s *stripeSeries) getOrSet(hash uint64, series *record.MemSeries) (*record.MemSeries, bool) {
+func (s *stripeSeries) getOrSet(hash uint64, series *memSeries) (*memSeries, bool) {
 	i := hash & stripeMask
 
 	s.locks[i].Lock()
 
-	if prev := s.hashes[i].get(hash, series.Lset); prev != nil {
+	if prev := s.hashes[i].get(hash, series.lset); prev != nil {
 		s.locks[i].Unlock()
 		return prev, false
 	}
 	s.hashes[i].set(hash, series)
 	s.locks[i].Unlock()
 
-	i = series.Ref & stripeMask
+	i = series.ref & stripeMask
 
 	s.locks[i].Lock()
-	s.series[i][series.Ref] = series
+	s.series[i][series.ref] = series
 	s.locks[i].Unlock()
 
 	return series, true
 }
 
+type sample struct {
+	t int64
+	v float64
+}
+
+func (s sample) T() int64 {
+	return s.t
+}
+
+func (s sample) V() float64 {
+	return s.v
+}
+
+// memSeries is the in-memory representation of a series. None of its methods
+// are goroutine safe and it is the caller's responsibility to lock it.
+type memSeries struct {
+	sync.Mutex
+
+	ref       uint64
+	chunks    []*memChunk
+	lset      labels.Labels
+	headChunk *memChunk
+
+	chunkRange   int64
+	firstChunkID int
+
+	nextAt        int64 // Timestamp at which to cut the next chunk.
+	sampleBuf     [4]sample
+	pendingCommit bool // Whether there are samples waiting to be committed to this series.
+
+	app chunkenc.Appender // Current appender for the chunk.
+}
+
+func newMemSeries(lset labels.Labels, id uint64, chunkRange int64) *memSeries {
+	s := &memSeries{
+		lset:       lset,
+		ref:        id,
+		chunkRange: chunkRange,
+		nextAt:     math.MinInt64,
+	}
+	return s
+}
+
+func (s *memSeries) MinTime() int64 {
+	if len(s.chunks) == 0 {
+		return math.MinInt64
+	}
+	return s.chunks[0].MinTime
+}
+
+func (s *memSeries) MaxTime() int64 {
+	c := s.head()
+	if c == nil {
+		return math.MinInt64
+	}
+	return c.MaxTime
+}
+
+func (s *memSeries) cut(mint int64) *memChunk {
+	c := &memChunk{
+		Chunk:   chunkenc.NewXORChunk(),
+		MinTime: mint,
+		MaxTime: math.MinInt64,
+	}
+	s.chunks = append(s.chunks, c)
+	s.headChunk = c
+
+	// Set upper bound on when the next chunk must be started. An earlier timestamp
+	// may be chosen dynamically at a later point.
+	s.nextAt = rangeForTimestamp(mint, s.chunkRange)
+
+	app, err := c.Chunk.Appender()
+	if err != nil {
+		panic(err)
+	}
+	s.app = app
+	return c
+}
+
+func (s *memSeries) ChunksMetas() []chunks.Meta {
+	metas := make([]chunks.Meta, 0, len(s.chunks))
+	for _, chk := range s.chunks {
+		metas = append(metas, chunks.Meta{Chunk: chk.Chunk, MinTime: chk.MinTime, MaxTime: chk.MaxTime})
+	}
+	return metas
+}
+
+// reset re-initialises all the variable in the memSeries except 'lset', 'ref',
+// and 'chunkRange', like how it would appear after 'newmemSeries(...)'.
+func (s *memSeries) Reset() {
+	s.chunks = nil
+	s.headChunk = nil
+	s.firstChunkID = 0
+	s.nextAt = math.MinInt64
+	s.sampleBuf = [4]sample{}
+	s.pendingCommit = false
+	s.app = nil
+}
+
+// Appendable checks whether the given sample is valid for appending to the series.
+func (s *memSeries) Appendable(t int64, v float64) error {
+	c := s.head()
+	if c == nil {
+		return nil
+	}
+
+	if t > c.MaxTime {
+		return nil
+	}
+	if t < c.MaxTime {
+		return record.ErrOutOfOrderSample
+	}
+	// We are allowing exact duplicates as we can encounter them in valid cases
+	// like federation and erroring out at that time would be extremely noisy.
+	if math.Float64bits(s.sampleBuf[3].v) != math.Float64bits(v) {
+		return record.ErrAmendSample
+	}
+	return nil
+}
+
+func (s *memSeries) Chunk(id int) *memChunk {
+	ix := id - s.firstChunkID
+	if ix < 0 || ix >= len(s.chunks) {
+		return nil
+	}
+	return s.chunks[ix]
+}
+
+func (s *memSeries) ChunkID(pos int) int {
+	return pos + s.firstChunkID
+}
+
+// TruncateChunksBefore removes all chunks from the series that have not timestamp
+// at or after mint. Chunk IDs remain unchanged.
+func (s *memSeries) TruncateChunksBefore(mint int64) (removed int) {
+	var k int
+	for i, c := range s.chunks {
+		if c.MaxTime >= mint {
+			break
+		}
+		k = i + 1
+	}
+	s.chunks = append(s.chunks[:0], s.chunks[k:]...)
+	s.firstChunkID += k
+	if len(s.chunks) == 0 {
+		s.headChunk = nil
+	} else {
+		s.headChunk = s.chunks[len(s.chunks)-1]
+	}
+
+	return k
+}
+
+// Append adds the sample (t, v) to the series.
+func (s *memSeries) Append(t int64, v float64) (success, chunkCreated bool) {
+	// Based on Gorilla white papers this offers near-optimal compression ratio
+	// so anything bigger that this has diminishing returns and increases
+	// the time range within which we have to decompress all samples.
+	const samplesPerChunk = 120
+
+	c := s.head()
+
+	if c == nil {
+		c = s.cut(t)
+		chunkCreated = true
+	}
+	numSamples := c.Chunk.NumSamples()
+
+	// Out of order sample.
+	if c.MaxTime >= t {
+		return false, chunkCreated
+	}
+	// If we reach 25% of a chunk's desired sample count, set a definitive time
+	// at which to start the next chunk.
+	// At latest it must happen at the timestamp set when the chunk was cut.
+	if numSamples == samplesPerChunk/4 {
+		s.nextAt = computeChunkEndTime(c.MinTime, c.MaxTime, s.nextAt)
+	}
+	if t >= s.nextAt {
+		c = s.cut(t)
+		chunkCreated = true
+	}
+	s.app.Append(t, v)
+
+	c.MaxTime = t
+
+	s.sampleBuf[0] = s.sampleBuf[1]
+	s.sampleBuf[1] = s.sampleBuf[2]
+	s.sampleBuf[2] = s.sampleBuf[3]
+	s.sampleBuf[3] = sample{t: t, v: v}
+
+	return true, chunkCreated
+}
+
+func (s *memSeries) Iterator(id int) chunkenc.Iterator {
+	c := s.Chunk(id)
+	// TODO(fabxc): Work around! A querier may have retrieved a pointer to a series' chunk,
+	// which got then garbage collected before it got accessed.
+	// We must ensure to not garbage collect as long as any readers still hold a reference.
+	if c == nil {
+		return chunkenc.NewNopIterator()
+	}
+
+	if id-s.firstChunkID < len(s.chunks)-1 {
+		return c.Chunk.Iterator()
+	}
+	// Serve the last 4 samples for the last chunk from the sample buffer
+	// as their compressed bytes may be mutated by added samples.
+	it := &memSafeIterator{
+		Iterator: c.Chunk.Iterator(),
+		i:        -1,
+		total:    c.Chunk.NumSamples(),
+		buf:      s.sampleBuf,
+	}
+	return it
+}
+
+func (s *memSeries) head() *memChunk {
+	return s.headChunk
+}
+
+type memChunk struct {
+	Chunk            chunkenc.Chunk
+	MinTime, MaxTime int64
+}
+
+// Returns true if the chunk overlaps [mint, maxt].
+func (mc *memChunk) OverlapsClosedInterval(mint, maxt int64) bool {
+	return mc.MinTime <= maxt && mint <= mc.MaxTime
+}
+
+type memSafeIterator struct {
+	chunkenc.Iterator
+
+	i     int
+	total int
+	buf   [4]sample
+}
+
+func (it *memSafeIterator) Next() bool {
+	if it.i+1 >= it.total {
+		return false
+	}
+	it.i++
+	if it.total-it.i > 4 {
+		return it.Iterator.Next()
+	}
+	return true
+}
+
+func (it *memSafeIterator) At() (int64, float64) {
+	if it.total-it.i > 4 {
+		return it.Iterator.At()
+	}
+	s := it.buf[4-(it.total-it.i)]
+	return s.t, s.v
+}
+
 // computeChunkEndTime estimates the end timestamp based the beginning of a chunk,
 // its current timestamp and the upper bound up to which we insert data.
 // It assumes that the time range is 1/4 full.
diff --git a/head_test.go b/head_test.go
index 7b4caded..c5e4d358 100644
--- a/head_test.go
+++ b/head_test.go
@@ -148,10 +148,10 @@ func TestHead_ReadWAL(t *testing.T) {
 			s50 := head.series.getByID(50)
 			s100 := head.series.getByID(100)
 
-			testutil.Equals(t, labels.FromStrings("a", "1"), s10.Lset)
-			testutil.Equals(t, (*record.MemSeries)(nil), s11) // Series without samples should be garbage colected at head.Init().
-			testutil.Equals(t, labels.FromStrings("a", "4"), s50.Lset)
-			testutil.Equals(t, labels.FromStrings("a", "3"), s100.Lset)
+			testutil.Equals(t, labels.FromStrings("a", "1"), s10.lset)
+			testutil.Equals(t, (*memSeries)(nil), s11) // Series without samples should be garbage colected at head.Init().
+			testutil.Equals(t, labels.FromStrings("a", "4"), s50.lset)
+			testutil.Equals(t, labels.FromStrings("a", "3"), s100.lset)
 
 			expandChunk := func(c chunkenc.Iterator) (x []sample) {
 				for c.Next() {
@@ -225,38 +225,38 @@ func TestHead_Truncate(t *testing.T) {
 	s3, _ := h.getOrCreate(3, labels.FromStrings("a", "1", "b", "2"))
 	s4, _ := h.getOrCreate(4, labels.FromStrings("a", "2", "b", "2", "c", "1"))
 
-	s1.Chunks = []*record.MemChunk{
+	s1.chunks = []*memChunk{
 		{MinTime: 0, MaxTime: 999},
 		{MinTime: 1000, MaxTime: 1999},
 		{MinTime: 2000, MaxTime: 2999},
 	}
-	s2.Chunks = []*record.MemChunk{
+	s2.chunks = []*memChunk{
 		{MinTime: 1000, MaxTime: 1999},
 		{MinTime: 2000, MaxTime: 2999},
 		{MinTime: 3000, MaxTime: 3999},
 	}
-	s3.Chunks = []*record.MemChunk{
+	s3.chunks = []*memChunk{
 		{MinTime: 0, MaxTime: 999},
 		{MinTime: 1000, MaxTime: 1999},
 	}
-	s4.Chunks = []*record.MemChunk{}
+	s4.chunks = []*memChunk{}
 
 	// Truncation need not be aligned.
 	testutil.Ok(t, h.Truncate(1))
 
 	testutil.Ok(t, h.Truncate(2000))
 
-	testutil.Equals(t, []*record.MemChunk{
+	testutil.Equals(t, []*memChunk{
 		{MinTime: 2000, MaxTime: 2999},
-	}, h.series.getByID(s1.Ref).Chunks)
+	}, h.series.getByID(s1.ref).chunks)
 
-	testutil.Equals(t, []*record.MemChunk{
+	testutil.Equals(t, []*memChunk{
 		{MinTime: 2000, MaxTime: 2999},
 		{MinTime: 3000, MaxTime: 3999},
-	}, h.series.getByID(s2.Ref).Chunks)
+	}, h.series.getByID(s2.ref).chunks)
 
-	testutil.Assert(t, h.series.getByID(s3.Ref) == nil, "")
-	testutil.Assert(t, h.series.getByID(s4.Ref) == nil, "")
+	testutil.Assert(t, h.series.getByID(s3.ref) == nil, "")
+	testutil.Assert(t, h.series.getByID(s4.ref) == nil, "")
 
 	postingsA1, _ := index.ExpandPostings(h.postings.Get("a", "1"))
 	postingsA2, _ := index.ExpandPostings(h.postings.Get("a", "2"))
@@ -265,10 +265,10 @@ func TestHead_Truncate(t *testing.T) {
 	postingsC1, _ := index.ExpandPostings(h.postings.Get("c", "1"))
 	postingsAll, _ := index.ExpandPostings(h.postings.Get("", ""))
 
-	testutil.Equals(t, []uint64{s1.Ref}, postingsA1)
-	testutil.Equals(t, []uint64{s2.Ref}, postingsA2)
-	testutil.Equals(t, []uint64{s1.Ref, s2.Ref}, postingsB1)
-	testutil.Equals(t, []uint64{s1.Ref, s2.Ref}, postingsAll)
+	testutil.Equals(t, []uint64{s1.ref}, postingsA1)
+	testutil.Equals(t, []uint64{s2.ref}, postingsA2)
+	testutil.Equals(t, []uint64{s1.ref, s2.ref}, postingsB1)
+	testutil.Equals(t, []uint64{s1.ref, s2.ref}, postingsAll)
 	testutil.Assert(t, postingsB2 == nil, "")
 	testutil.Assert(t, postingsC1 == nil, "")
 
@@ -290,7 +290,7 @@ func TestHead_Truncate(t *testing.T) {
 // Validate various behaviors brought on by firstChunkID accounting for
 // garbage collected chunks.
 func TestMemSeries_truncateChunks(t *testing.T) {
-	s := record.NewMemSeries(labels.FromStrings("a", "b"), 1, 2000)
+	s := newMemSeries(labels.FromStrings("a", "b"), 1, 2000)
 
 	for i := 0; i < 4000; i += 5 {
 		ok, _ := s.Append(int64(i), float64(i))
@@ -299,7 +299,7 @@ func TestMemSeries_truncateChunks(t *testing.T) {
 
 	// Check that truncate removes half of the chunks and afterwards
 	// that the ID of the last chunk still gives us the same chunk afterwards.
-	countBefore := len(s.Chunks)
+	countBefore := len(s.chunks)
 	lastID := s.ChunkID(countBefore - 1)
 	lastChunk := s.Chunk(lastID)
 
@@ -308,9 +308,9 @@ func TestMemSeries_truncateChunks(t *testing.T) {
 
 	s.TruncateChunksBefore(2000)
 
-	testutil.Equals(t, int64(2000), s.Chunks[0].MinTime)
+	testutil.Equals(t, int64(2000), s.chunks[0].MinTime)
 	testutil.Assert(t, s.Chunk(0) == nil, "first chunks not gone")
-	testutil.Equals(t, countBefore/2, len(s.Chunks))
+	testutil.Equals(t, countBefore/2, len(s.chunks))
 	testutil.Equals(t, lastChunk, s.Chunk(lastID))
 
 	// Validate that the series' sample buffer is applied correctly to the last chunk
@@ -854,7 +854,7 @@ func TestComputeChunkEndTime(t *testing.T) {
 }
 
 func TestMemSeries_append(t *testing.T) {
-	s := record.NewMemSeries(labels.Labels{}, 1, 500)
+	s := newMemSeries(labels.Labels{}, 1, 500)
 
 	// Add first two samples at the very end of a chunk range and the next two
 	// on and after it.
@@ -875,8 +875,8 @@ func TestMemSeries_append(t *testing.T) {
 	testutil.Assert(t, ok, "append failed")
 	testutil.Assert(t, !chunkCreated, "second sample should use same chunk")
 
-	testutil.Assert(t, s.Chunks[0].MinTime == 998 && s.Chunks[0].MaxTime == 999, "wrong chunk range")
-	testutil.Assert(t, s.Chunks[1].MinTime == 1000 && s.Chunks[1].MaxTime == 1001, "wrong chunk range")
+	testutil.Assert(t, s.chunks[0].MinTime == 998 && s.chunks[0].MaxTime == 999, "wrong chunk range")
+	testutil.Assert(t, s.chunks[1].MinTime == 1000 && s.chunks[1].MaxTime == 1001, "wrong chunk range")
 
 	// Fill the range [1000,2000) with many samples. Intermediate chunks should be cut
 	// at approximately 120 samples per chunk.
@@ -885,10 +885,10 @@ func TestMemSeries_append(t *testing.T) {
 		testutil.Assert(t, ok, "append failed")
 	}
 
-	testutil.Assert(t, len(s.Chunks) > 7, "expected intermediate chunks")
+	testutil.Assert(t, len(s.chunks) > 7, "expected intermediate chunks")
 
 	// All chunks but the first and last should now be moderately full.
-	for i, c := range s.Chunks[1 : len(s.Chunks)-1] {
+	for i, c := range s.chunks[1 : len(s.chunks)-1] {
 		testutil.Assert(t, c.Chunk.NumSamples() > 100, "unexpected small chunk %d of length %d", i, c.Chunk.NumSamples())
 	}
 }
@@ -902,7 +902,7 @@ func TestGCChunkAccess(t *testing.T) {
 	h.initTime(0)
 
 	s, _ := h.getOrCreate(1, labels.FromStrings("a", "1"))
-	s.Chunks = []*record.MemChunk{
+	s.chunks = []*memChunk{
 		{MinTime: 0, MaxTime: 999},
 		{MinTime: 1000, MaxTime: 1999},
 	}
@@ -942,7 +942,7 @@ func TestGCSeriesAccess(t *testing.T) {
 	h.initTime(0)
 
 	s, _ := h.getOrCreate(1, labels.FromStrings("a", "1"))
-	s.Chunks = []*record.MemChunk{
+	s.chunks = []*memChunk{
 		{MinTime: 0, MaxTime: 999},
 		{MinTime: 1000, MaxTime: 1999},
 	}
@@ -967,7 +967,7 @@ func TestGCSeriesAccess(t *testing.T) {
 
 	testutil.Ok(t, h.Truncate(2000)) // Remove the series.
 
-	testutil.Equals(t, (*record.MemSeries)(nil), h.series.getByID(1))
+	testutil.Equals(t, (*memSeries)(nil), h.series.getByID(1))
 
 	_, err = cr.Chunk(chunks[0].Ref)
 	testutil.Equals(t, record.ErrNotFound, err)
@@ -1030,7 +1030,7 @@ func TestRemoveSeriesAfterRollbackAndTruncate(t *testing.T) {
 
 	// Truncate again, this time the series should be deleted
 	testutil.Ok(t, h.Truncate(2050))
-	testutil.Equals(t, (*record.MemSeries)(nil), h.series.getByHash(lset.Hash(), lset))
+	testutil.Equals(t, (*memSeries)(nil), h.series.getByHash(lset.Hash(), lset))
 }
 
 func TestHead_LogRollback(t *testing.T) {
diff --git a/querier_test.go b/querier_test.go
index b24ca131..7cf69078 100644
--- a/querier_test.go
+++ b/querier_test.go
@@ -190,19 +190,6 @@ func expandSeriesIterator(it SeriesIterator) (r []tsdbutil.Sample, err error) {
 	return r, it.Err()
 }
 
-type sample struct {
-	t int64
-	v float64
-}
-
-func (s sample) T() int64 {
-	return s.t
-}
-
-func (s sample) V() float64 {
-	return s.v
-}
-
 type seriesSamples struct {
 	lset   map[string]string
 	chunks [][]sample
diff --git a/record/internal.go b/record/internal.go
index 89a0cc2d..bd7af165 100644
--- a/record/internal.go
+++ b/record/internal.go
@@ -14,13 +14,9 @@ package record
 
 import (
 	"errors"
-	"math"
 	"os"
 	"path/filepath"
-	"sync"
 
-	"github.com/prometheus/tsdb/chunkenc"
-	"github.com/prometheus/tsdb/chunks"
 	"github.com/prometheus/tsdb/fileutil"
 	"github.com/prometheus/tsdb/labels"
 )
@@ -38,19 +34,6 @@ var (
 	ErrAmendSample = errors.New("amending sample")
 )
 
-type sample struct {
-	t int64
-	v float64
-}
-
-func (s sample) T() int64 {
-	return s.t
-}
-
-func (s sample) V() float64 {
-	return s.v
-}
-
 // RefSeries is the series labels with the series ID.
 type RefSeries struct {
 	Ref    uint64
@@ -59,255 +42,9 @@ type RefSeries struct {
 
 // RefSample is a timestamp/value pair associated with a reference to a series.
 type RefSample struct {
-	Ref    uint64
-	T      int64
-	V      float64
-	Series *MemSeries
-}
-
-// MemSeries is the in-memory representation of a series. None of its methods
-// are goroutine safe and it is the caller's responsibility to lock it.
-type MemSeries struct {
-	sync.Mutex
-
-	Ref           uint64
-	PendingCommit bool // Whether there are samples waiting to be committed to this series.
-	Chunks        []*MemChunk
-	Lset          labels.Labels
-	HeadChunk     *MemChunk
-
-	chunkRange   int64
-	firstChunkID int
-
-	nextAt    int64 // Timestamp at which to cut the next chunk.
-	sampleBuf [4]sample
-
-	app chunkenc.Appender // Current appender for the chunk.
-}
-
-func NewMemSeries(lset labels.Labels, id uint64, chunkRange int64) *MemSeries {
-	s := &MemSeries{
-		Lset:       lset,
-		Ref:        id,
-		chunkRange: chunkRange,
-		nextAt:     math.MinInt64,
-	}
-	return s
-}
-
-func (s *MemSeries) MinTime() int64 {
-	if len(s.Chunks) == 0 {
-		return math.MinInt64
-	}
-	return s.Chunks[0].MinTime
-}
-
-func (s *MemSeries) MaxTime() int64 {
-	c := s.head()
-	if c == nil {
-		return math.MinInt64
-	}
-	return c.MaxTime
-}
-
-func (s *MemSeries) cut(mint int64) *MemChunk {
-	c := &MemChunk{
-		Chunk:   chunkenc.NewXORChunk(),
-		MinTime: mint,
-		MaxTime: math.MinInt64,
-	}
-	s.Chunks = append(s.Chunks, c)
-	s.HeadChunk = c
-
-	// Set upper bound on when the next chunk must be started. An earlier timestamp
-	// may be chosen dynamically at a later point.
-	s.nextAt = rangeForTimestamp(mint, s.chunkRange)
-
-	app, err := c.Chunk.Appender()
-	if err != nil {
-		panic(err)
-	}
-	s.app = app
-	return c
-}
-
-func (s *MemSeries) ChunksMetas() []chunks.Meta {
-	metas := make([]chunks.Meta, 0, len(s.Chunks))
-	for _, chk := range s.Chunks {
-		metas = append(metas, chunks.Meta{Chunk: chk.Chunk, MinTime: chk.MinTime, MaxTime: chk.MaxTime})
-	}
-	return metas
-}
-
-// reset re-initialises all the variable in the MemSeries except 'lset', 'ref',
-// and 'chunkRange', like how it would appear after 'newMemSeries(...)'.
-func (s *MemSeries) Reset() {
-	s.Chunks = nil
-	s.HeadChunk = nil
-	s.firstChunkID = 0
-	s.nextAt = math.MinInt64
-	s.sampleBuf = [4]sample{}
-	s.PendingCommit = false
-	s.app = nil
-}
-
-// Appendable checks whether the given sample is valid for appending to the series.
-func (s *MemSeries) Appendable(t int64, v float64) error {
-	c := s.head()
-	if c == nil {
-		return nil
-	}
-
-	if t > c.MaxTime {
-		return nil
-	}
-	if t < c.MaxTime {
-		return ErrOutOfOrderSample
-	}
-	// We are allowing exact duplicates as we can encounter them in valid cases
-	// like federation and erroring out at that time would be extremely noisy.
-	if math.Float64bits(s.sampleBuf[3].v) != math.Float64bits(v) {
-		return ErrAmendSample
-	}
-	return nil
-}
-
-func (s *MemSeries) Chunk(id int) *MemChunk {
-	ix := id - s.firstChunkID
-	if ix < 0 || ix >= len(s.Chunks) {
-		return nil
-	}
-	return s.Chunks[ix]
-}
-
-func (s *MemSeries) ChunkID(pos int) int {
-	return pos + s.firstChunkID
-}
-
-// TruncateChunksBefore removes all chunks from the series that have not timestamp
-// at or after mint. Chunk IDs remain unchanged.
-func (s *MemSeries) TruncateChunksBefore(mint int64) (removed int) {
-	var k int
-	for i, c := range s.Chunks {
-		if c.MaxTime >= mint {
-			break
-		}
-		k = i + 1
-	}
-	s.Chunks = append(s.Chunks[:0], s.Chunks[k:]...)
-	s.firstChunkID += k
-	if len(s.Chunks) == 0 {
-		s.HeadChunk = nil
-	} else {
-		s.HeadChunk = s.Chunks[len(s.Chunks)-1]
-	}
-
-	return k
-}
-
-// Append adds the sample (t, v) to the series.
-func (s *MemSeries) Append(t int64, v float64) (success, chunkCreated bool) {
-	// Based on Gorilla white papers this offers near-optimal compression ratio
-	// so anything bigger that this has diminishing returns and increases
-	// the time range within which we have to decompress all samples.
-	const samplesPerChunk = 120
-
-	c := s.head()
-
-	if c == nil {
-		c = s.cut(t)
-		chunkCreated = true
-	}
-	numSamples := c.Chunk.NumSamples()
-
-	// Out of order sample.
-	if c.MaxTime >= t {
-		return false, chunkCreated
-	}
-	// If we reach 25% of a chunk's desired sample count, set a definitive time
-	// at which to start the next chunk.
-	// At latest it must happen at the timestamp set when the chunk was cut.
-	if numSamples == samplesPerChunk/4 {
-		s.nextAt = computeChunkEndTime(c.MinTime, c.MaxTime, s.nextAt)
-	}
-	if t >= s.nextAt {
-		c = s.cut(t)
-		chunkCreated = true
-	}
-	s.app.Append(t, v)
-
-	c.MaxTime = t
-
-	s.sampleBuf[0] = s.sampleBuf[1]
-	s.sampleBuf[1] = s.sampleBuf[2]
-	s.sampleBuf[2] = s.sampleBuf[3]
-	s.sampleBuf[3] = sample{t: t, v: v}
-
-	return true, chunkCreated
-}
-
-func (s *MemSeries) Iterator(id int) chunkenc.Iterator {
-	c := s.Chunk(id)
-	// TODO(fabxc): Work around! A querier may have retrieved a pointer to a series' chunk,
-	// which got then garbage collected before it got accessed.
-	// We must ensure to not garbage collect as long as any readers still hold a reference.
-	if c == nil {
-		return chunkenc.NewNopIterator()
-	}
-
-	if id-s.firstChunkID < len(s.Chunks)-1 {
-		return c.Chunk.Iterator()
-	}
-	// Serve the last 4 samples for the last chunk from the sample buffer
-	// as their compressed bytes may be mutated by added samples.
-	it := &MemSafeIterator{
-		Iterator: c.Chunk.Iterator(),
-		i:        -1,
-		total:    c.Chunk.NumSamples(),
-		buf:      s.sampleBuf,
-	}
-	return it
-}
-
-func (s *MemSeries) head() *MemChunk {
-	return s.HeadChunk
-}
-
-type MemChunk struct {
-	Chunk            chunkenc.Chunk
-	MinTime, MaxTime int64
-}
-
-// Returns true if the chunk overlaps [mint, maxt].
-func (mc *MemChunk) OverlapsClosedInterval(mint, maxt int64) bool {
-	return mc.MinTime <= maxt && mint <= mc.MaxTime
-}
-
-type MemSafeIterator struct {
-	chunkenc.Iterator
-
-	i     int
-	total int
-	buf   [4]sample
-}
-
-func (it *MemSafeIterator) Next() bool {
-	if it.i+1 >= it.total {
-		return false
-	}
-	it.i++
-	if it.total-it.i > 4 {
-		return it.Iterator.Next()
-	}
-	return true
-}
-
-func (it *MemSafeIterator) At() (int64, float64) {
-	if it.total-it.i > 4 {
-		return it.Iterator.At()
-	}
-	s := it.buf[4-(it.total-it.i)]
-	return s.t, s.v
+	Ref uint64
+	T   int64
+	V   float64
 }
 
 func rangeForTimestamp(t int64, width int64) (maxt int64) {

From 1a4aba41326ccc8af8a7aff11acf3184dc86c1e6 Mon Sep 17 00:00:00 2001
From: Callum Styan <callumstyan@gmail.com>
Date: Wed, 3 Jul 2019 20:52:27 -0700
Subject: [PATCH 08/16] We can just duplicate ErrNotFound to the record
 package.

Signed-off-by: Callum Styan <callumstyan@gmail.com>
---
 cmd/tsdb/main.go   |  3 +--
 db_test.go         |  6 +++---
 head.go            | 27 ++++++++++++++++++++-------
 head_test.go       |  9 ++++-----
 querier.go         |  5 ++---
 querier_test.go    |  3 +--
 record/internal.go | 10 +---------
 7 files changed, 32 insertions(+), 31 deletions(-)

diff --git a/cmd/tsdb/main.go b/cmd/tsdb/main.go
index 829891ef..e3dc530a 100644
--- a/cmd/tsdb/main.go
+++ b/cmd/tsdb/main.go
@@ -36,7 +36,6 @@ import (
 	"github.com/prometheus/tsdb/chunks"
 	tsdb_errors "github.com/prometheus/tsdb/errors"
 	"github.com/prometheus/tsdb/labels"
-	"github.com/prometheus/tsdb/record"
 	"gopkg.in/alecthomas/kingpin.v2"
 )
 
@@ -307,7 +306,7 @@ func (b *writeBenchmark) ingestScrapesShard(lbls []labels.Labels, scrapeCount in
 				s.ref = &ref
 			} else if err := app.AddFast(*s.ref, ts, float64(s.value)); err != nil {
 
-				if errors.Cause(err) != record.ErrNotFound {
+				if errors.Cause(err) != tsdb.ErrNotFound {
 					panic(err)
 				}
 
diff --git a/db_test.go b/db_test.go
index 66a44661..8d5d3512 100644
--- a/db_test.go
+++ b/db_test.go
@@ -198,7 +198,7 @@ func TestDBAppenderAddRef(t *testing.T) {
 	testutil.Ok(t, err)
 
 	err = app2.AddFast(9999999, 1, 1)
-	testutil.Equals(t, record.ErrNotFound, errors.Cause(err))
+	testutil.Equals(t, ErrNotFound, errors.Cause(err))
 
 	testutil.Ok(t, app2.Commit())
 
@@ -361,7 +361,7 @@ func TestAmendDatapointCausesError(t *testing.T) {
 
 	app = db.Appender()
 	_, err = app.Add(labels.Labels{}, 0, 1)
-	testutil.Equals(t, record.ErrAmendSample, err)
+	testutil.Equals(t, ErrAmendSample, err)
 	testutil.Ok(t, app.Rollback())
 }
 
@@ -395,7 +395,7 @@ func TestNonDuplicateNaNDatapointsCausesAmendError(t *testing.T) {
 
 	app = db.Appender()
 	_, err = app.Add(labels.Labels{}, 0, math.Float64frombits(0x7ff0000000000002))
-	testutil.Equals(t, record.ErrAmendSample, err)
+	testutil.Equals(t, ErrAmendSample, err)
 }
 
 func TestSkippingInvalidValuesInSameTxn(t *testing.T) {
diff --git a/head.go b/head.go
index fa385ca6..fe4ff388 100644
--- a/head.go
+++ b/head.go
@@ -39,10 +39,21 @@ import (
 )
 
 var (
+	// ErrNotFound is returned if a looked up resource was not found.
+	ErrNotFound = errors.Errorf("not found")
+
+	// ErrOutOfOrderSample is returned if an appended sample has a
+	// timestamp smaller than the most recent sample.
+	ErrOutOfOrderSample = errors.New("out of order sample")
+
 	// ErrOutOfBounds is returned if an appended sample is out of the
 	// writable time range.
 	ErrOutOfBounds = errors.New("out of bounds")
 
+	// ErrAmendSample is returned if an appended sample has the same timestamp
+	// as the most recent sample but a different value.
+	ErrAmendSample = errors.New("amending sample")
+
 	// emptyTombstoneReader is a no-op Tombstone Reader.
 	// This is used by head to satisfy the Tombstones() function call.
 	emptyTombstoneReader = tombstones.NewMemTombstones()
@@ -501,6 +512,8 @@ func (h *Head) Init(minValidTime int64) error {
 	level.Info(h.logger).Log("msg", "replaying WAL, this may take awhile")
 	// Backfill the checkpoint first if it exists.
 	dir, startFrom, err := wal.LastCheckpoint(h.wal.Dir())
+	// We need to compare err to record.ErrNotFound as that's what
+	// wal.LastCheckpoint would return, not tsdb.ErrNotFound.
 	if err != nil && err != record.ErrNotFound {
 		return errors.Wrap(err, "find last checkpoint")
 	}
@@ -731,7 +744,7 @@ func (a *initAppender) Add(lset labels.Labels, t int64, v float64) (uint64, erro
 
 func (a *initAppender) AddFast(ref uint64, t int64, v float64) error {
 	if a.app == nil {
-		return record.ErrNotFound
+		return ErrNotFound
 	}
 	return a.app.AddFast(ref, t, v)
 }
@@ -856,7 +869,7 @@ func (a *headAppender) AddFast(ref uint64, t int64, v float64) error {
 
 	s := a.head.series.getByID(ref)
 	if s == nil {
-		return errors.Wrap(record.ErrNotFound, "unknown series")
+		return errors.Wrap(ErrNotFound, "unknown series")
 	}
 	s.Lock()
 	if err := s.Appendable(t, v); err != nil {
@@ -1213,7 +1226,7 @@ func (h *headChunkReader) Chunk(ref uint64) (chunkenc.Chunk, error) {
 	s := h.head.series.getByID(sid)
 	// This means that the series has been garbage collected.
 	if s == nil {
-		return nil, record.ErrNotFound
+		return nil, ErrNotFound
 	}
 
 	s.Lock()
@@ -1223,7 +1236,7 @@ func (h *headChunkReader) Chunk(ref uint64) (chunkenc.Chunk, error) {
 	// the specified range.
 	if c == nil || !c.OverlapsClosedInterval(h.mint, h.maxt) {
 		s.Unlock()
-		return nil, record.ErrNotFound
+		return nil, ErrNotFound
 	}
 	s.Unlock()
 
@@ -1339,7 +1352,7 @@ func (h *headIndexReader) Series(ref uint64, lbls *labels.Labels, chks *[]chunks
 
 	if s == nil {
 		h.head.metrics.seriesNotFound.Inc()
-		return record.ErrNotFound
+		return ErrNotFound
 	}
 	*lbls = append((*lbls)[:0], s.lset...)
 
@@ -1700,12 +1713,12 @@ func (s *memSeries) Appendable(t int64, v float64) error {
 		return nil
 	}
 	if t < c.MaxTime {
-		return record.ErrOutOfOrderSample
+		return ErrOutOfOrderSample
 	}
 	// We are allowing exact duplicates as we can encounter them in valid cases
 	// like federation and erroring out at that time would be extremely noisy.
 	if math.Float64bits(s.sampleBuf[3].v) != math.Float64bits(v) {
-		return record.ErrAmendSample
+		return ErrAmendSample
 	}
 	return nil
 }
diff --git a/head_test.go b/head_test.go
index c5e4d358..1a1a95b5 100644
--- a/head_test.go
+++ b/head_test.go
@@ -928,7 +928,7 @@ func TestGCChunkAccess(t *testing.T) {
 	testutil.Ok(t, h.Truncate(1500)) // Remove a chunk.
 
 	_, err = cr.Chunk(chunks[0].Ref)
-	testutil.Equals(t, record.ErrNotFound, err)
+	testutil.Equals(t, ErrNotFound, err)
 	_, err = cr.Chunk(chunks[1].Ref)
 	testutil.Ok(t, err)
 }
@@ -970,9 +970,9 @@ func TestGCSeriesAccess(t *testing.T) {
 	testutil.Equals(t, (*memSeries)(nil), h.series.getByID(1))
 
 	_, err = cr.Chunk(chunks[0].Ref)
-	testutil.Equals(t, record.ErrNotFound, err)
+	testutil.Equals(t, ErrNotFound, err)
 	_, err = cr.Chunk(chunks[1].Ref)
-	testutil.Equals(t, record.ErrNotFound, err)
+	testutil.Equals(t, ErrNotFound, err)
 }
 
 func TestUncommittedSamplesNotLostOnTruncate(t *testing.T) {
@@ -1137,10 +1137,9 @@ func TestWalRepair_DecodingError(t *testing.T) {
 					testutil.Ok(t, err)
 					testutil.Equals(t, 0.0, prom_testutil.ToFloat64(h.metrics.walCorruptionsTotal))
 					initErr := h.Init(math.MinInt64)
-
 					err = errors.Cause(initErr) // So that we can pick up errors even if wrapped.
 					_, corrErr := err.(*wal.CorruptionErr)
-					testutil.Assert(t, corrErr, "reading the wal didn't return corruption error")
+					testutil.Assert(t, corrErr, fmt.Sprintf("reading the wal didn't return corruption error: %s", err))
 					testutil.Ok(t, w.Close())
 				}
 
diff --git a/querier.go b/querier.go
index 0e905783..d444f064 100644
--- a/querier.go
+++ b/querier.go
@@ -25,7 +25,6 @@ import (
 	tsdb_errors "github.com/prometheus/tsdb/errors"
 	"github.com/prometheus/tsdb/index"
 	"github.com/prometheus/tsdb/labels"
-	"github.com/prometheus/tsdb/record"
 	"github.com/prometheus/tsdb/tombstones"
 )
 
@@ -723,7 +722,7 @@ func (s *baseChunkSeries) Next() bool {
 		ref := s.p.At()
 		if err := s.index.Series(ref, &lset, &chkMetas); err != nil {
 			// Postings may be stale. Skip if no underlying series exists.
-			if errors.Cause(err) == record.ErrNotFound {
+			if errors.Cause(err) == ErrNotFound {
 				continue
 			}
 			s.err = err
@@ -803,7 +802,7 @@ func (s *populatedChunkSeries) Next() bool {
 			c.Chunk, s.err = s.chunks.Chunk(c.Ref)
 			if s.err != nil {
 				// This means that the chunk has be garbage collected. Remove it from the list.
-				if s.err == record.ErrNotFound {
+				if s.err == ErrNotFound {
 					s.err = nil
 					// Delete in-place.
 					s.chks = append(chks[:j], chks[j+1:]...)
diff --git a/querier_test.go b/querier_test.go
index 7cf69078..2794e0e6 100644
--- a/querier_test.go
+++ b/querier_test.go
@@ -29,7 +29,6 @@ import (
 	"github.com/prometheus/tsdb/chunks"
 	"github.com/prometheus/tsdb/index"
 	"github.com/prometheus/tsdb/labels"
-	"github.com/prometheus/tsdb/record"
 	"github.com/prometheus/tsdb/testutil"
 	"github.com/prometheus/tsdb/tombstones"
 	"github.com/prometheus/tsdb/tsdbutil"
@@ -1405,7 +1404,7 @@ func (m mockIndex) SortedPostings(p index.Postings) index.Postings {
 func (m mockIndex) Series(ref uint64, lset *labels.Labels, chks *[]chunks.Meta) error {
 	s, ok := m.series[ref]
 	if !ok {
-		return record.ErrNotFound
+		return ErrNotFound
 	}
 	*lset = append((*lset)[:0], s.l...)
 	*chks = append((*chks)[:0], s.chunks...)
diff --git a/record/internal.go b/record/internal.go
index bd7af165..ba90c682 100644
--- a/record/internal.go
+++ b/record/internal.go
@@ -22,16 +22,8 @@ import (
 )
 
 var (
-	// ErrOutOfOrderSample is returned if an appended sample has a
-	// timestamp smaller than the most recent sample.
-	ErrOutOfOrderSample = errors.New("out of order sample")
-
-	// ErrNotFound is returned if a looked up resource was not found.
+	// ErrNotFound is returned if a looked up resource was not found. Duplicate ErrNotFound from head.go.
 	ErrNotFound = errors.New("not found")
-
-	// ErrAmendSample is returned if an appended sample has the same timestamp
-	// as the most recent sample but a different value.
-	ErrAmendSample = errors.New("amending sample")
 )
 
 // RefSeries is the series labels with the series ID.

From fd51852c14d9de089729ef29ff01c2cc55255be2 Mon Sep 17 00:00:00 2001
From: Callum Styan <callumstyan@gmail.com>
Date: Thu, 4 Jul 2019 08:43:53 -0700
Subject: [PATCH 09/16] These functions aren't used anywhere within the record
 package anymore after refactoring.

Signed-off-by: Callum Styan <callumstyan@gmail.com>
---
 record/internal.go | 40 ----------------------------------------
 1 file changed, 40 deletions(-)

diff --git a/record/internal.go b/record/internal.go
index ba90c682..bec2de49 100644
--- a/record/internal.go
+++ b/record/internal.go
@@ -14,10 +14,7 @@ package record
 
 import (
 	"errors"
-	"os"
-	"path/filepath"
 
-	"github.com/prometheus/tsdb/fileutil"
 	"github.com/prometheus/tsdb/labels"
 )
 
@@ -38,40 +35,3 @@ type RefSample struct {
 	T   int64
 	V   float64
 }
-
-func rangeForTimestamp(t int64, width int64) (maxt int64) {
-	return (t/width)*width + width
-}
-
-// computeChunkEndTime estimates the end timestamp based the beginning of a chunk,
-// its current timestamp and the upper bound up to which we insert data.
-// It assumes that the time range is 1/4 full.
-func computeChunkEndTime(start, cur, max int64) int64 {
-	a := (max - start) / ((cur - start + 1) * 4)
-	if a == 0 {
-		return max
-	}
-	return start + (max-start)/a
-}
-
-// RenameFile renames the file from, removing to if it already exists before doing the rename.
-func RenameFile(from, to string) error {
-	if err := os.RemoveAll(to); err != nil {
-		return err
-	}
-	if err := os.Rename(from, to); err != nil {
-		return err
-	}
-
-	// Directory was renamed; sync parent dir to persist rename.
-	pdir, err := fileutil.OpenDir(filepath.Dir(to))
-	if err != nil {
-		return err
-	}
-
-	if err = pdir.Sync(); err != nil {
-		pdir.Close()
-		return err
-	}
-	return pdir.Close()
-}

From 323b5c416bd5100b9604f919e9e4c952fbbfb8ac Mon Sep 17 00:00:00 2001
From: Callum Styan <callumstyan@gmail.com>
Date: Thu, 4 Jul 2019 09:47:27 -0700
Subject: [PATCH 10/16] Change type names to remove record.Record... stutter.

Signed-off-by: Callum Styan <callumstyan@gmail.com>
---
 db_test.go              |  4 +--
 head.go                 | 12 ++++----
 head_test.go            | 14 ++++-----
 record/record.go        | 64 ++++++++++++++++++++---------------------
 record/record_test.go   |  8 +++---
 wal.go                  |  2 +-
 wal/checkpoint.go       | 10 +++----
 wal/checkpoint_test.go  |  8 +++---
 wal/wal_watcher.go      | 62 +++++++++++++++++++--------------------
 wal/wal_watcher_test.go | 24 ++++++++--------
 wal_test.go             | 10 +++----
 11 files changed, 109 insertions(+), 109 deletions(-)

diff --git a/db_test.go b/db_test.go
index 8d5d3512..dd977bc0 100644
--- a/db_test.go
+++ b/db_test.go
@@ -1472,7 +1472,7 @@ func TestInitializeHeadTimestamp(t *testing.T) {
 		w, err := wal.New(nil, nil, path.Join(dir, "wal"), false)
 		testutil.Ok(t, err)
 
-		var enc record.RecordEncoder
+		var enc record.Encoder
 		err = w.Log(
 			enc.Series([]record.RefSeries{
 				{Ref: 123, Labels: labels.FromStrings("a", "1")},
@@ -1522,7 +1522,7 @@ func TestInitializeHeadTimestamp(t *testing.T) {
 		w, err := wal.New(nil, nil, path.Join(dir, "wal"), false)
 		testutil.Ok(t, err)
 
-		var enc record.RecordEncoder
+		var enc record.Encoder
 		err = w.Log(
 			enc.Series([]record.RefSeries{
 				{Ref: 123, Labels: labels.FromStrings("a", "1")},
diff --git a/head.go b/head.go
index fe4ff388..26c45402 100644
--- a/head.go
+++ b/head.go
@@ -363,7 +363,7 @@ func (h *Head) loadWAL(r *wal.Reader, multiRef map[uint64]uint64) (err error) {
 	}
 
 	var (
-		dec       record.RecordDecoder
+		dec       record.Decoder
 		series    []record.RefSeries
 		samples   []record.RefSample
 		tstones   []tombstones.Stone
@@ -379,7 +379,7 @@ func (h *Head) loadWAL(r *wal.Reader, multiRef map[uint64]uint64) (err error) {
 		rec := r.Record()
 
 		switch dec.Type(rec) {
-		case record.RecordSeries:
+		case record.Series:
 			series, err = dec.Series(rec, series)
 			if err != nil {
 				return &wal.CorruptionErr{
@@ -402,7 +402,7 @@ func (h *Head) loadWAL(r *wal.Reader, multiRef map[uint64]uint64) (err error) {
 					h.lastSeriesID = s.Ref
 				}
 			}
-		case record.RecordSamples:
+		case record.Samples:
 			samples, err = dec.Samples(rec, samples)
 			s := samples
 			if err != nil {
@@ -443,7 +443,7 @@ func (h *Head) loadWAL(r *wal.Reader, multiRef map[uint64]uint64) (err error) {
 				samples = samples[m:]
 			}
 			samples = s // Keep whole slice for reuse.
-		case record.RecordTombstones:
+		case record.Tombstones:
 			tstones, err = dec.Tombstones(rec, tstones)
 			if err != nil {
 				return &wal.CorruptionErr{
@@ -904,7 +904,7 @@ func (a *headAppender) log() error {
 	defer func() { a.head.putBytesBuffer(buf) }()
 
 	var rec []byte
-	var enc record.RecordEncoder
+	var enc record.Encoder
 
 	if len(a.series) > 0 {
 		rec = enc.Series(a.series, buf)
@@ -1010,7 +1010,7 @@ func (h *Head) Delete(mint, maxt int64, ms ...labels.Matcher) error {
 	if p.Err() != nil {
 		return p.Err()
 	}
-	var enc record.RecordEncoder
+	var enc record.Encoder
 	if h.wal != nil {
 		// Although we don't store the stones in the head
 		// we need to write them to the WAL to mark these as deleted
diff --git a/head_test.go b/head_test.go
index 1a1a95b5..c3a2da71 100644
--- a/head_test.go
+++ b/head_test.go
@@ -53,7 +53,7 @@ func BenchmarkCreateSeries(b *testing.B) {
 }
 
 func populateTestWAL(t testing.TB, w *wal.WAL, recs []interface{}) {
-	var enc record.RecordEncoder
+	var enc record.Encoder
 	for _, r := range recs {
 		switch v := r.(type) {
 		case []record.RefSeries:
@@ -71,22 +71,22 @@ func readTestWAL(t testing.TB, dir string) (recs []interface{}) {
 	testutil.Ok(t, err)
 	defer sr.Close()
 
-	var dec record.RecordDecoder
+	var dec record.Decoder
 	r := wal.NewReader(sr)
 
 	for r.Next() {
 		rec := r.Record()
 
 		switch dec.Type(rec) {
-		case record.RecordSeries:
+		case record.Series:
 			series, err := dec.Series(rec, nil)
 			testutil.Ok(t, err)
 			recs = append(recs, series)
-		case record.RecordSamples:
+		case record.Samples:
 			samples, err := dec.Samples(rec, nil)
 			testutil.Ok(t, err)
 			recs = append(recs, samples)
-		case record.RecordTombstones:
+		case record.Tombstones:
 			tstones, err := dec.Tombstones(rec, nil)
 			testutil.Ok(t, err)
 			recs = append(recs, tstones)
@@ -1067,7 +1067,7 @@ func TestHead_LogRollback(t *testing.T) {
 // TestWalRepair_DecodingError ensures that a repair is run for an error
 // when decoding a record.
 func TestWalRepair_DecodingError(t *testing.T) {
-	var enc record.RecordEncoder
+	var enc record.Encoder
 	for name, test := range map[string]struct {
 		corrFunc  func(rec []byte) []byte // Func that applies the corruption to a record.
 		rec       []byte
@@ -1079,7 +1079,7 @@ func TestWalRepair_DecodingError(t *testing.T) {
 				// Do not modify the base record because it is Logged multiple times.
 				res := make([]byte, len(rec))
 				copy(res, rec)
-				res[0] = byte(record.RecordInvalid)
+				res[0] = byte(record.Invalid)
 				return res
 			},
 			enc.Series([]record.RefSeries{{Ref: 1, Labels: labels.FromStrings("a", "b")}}, []byte{}),
diff --git a/record/record.go b/record/record.go
index cf854b1d..5ab75111 100644
--- a/record/record.go
+++ b/record/record.go
@@ -24,43 +24,43 @@ import (
 	"github.com/prometheus/tsdb/tombstones"
 )
 
-// RecordType represents the data type of a record.
-type RecordType uint8
+// Type represents the data type of a record.
+type Type uint8
 
 const (
-	// RecordInvalid is returned for unrecognised WAL record types.
-	RecordInvalid RecordType = 255
-	// RecordSeries is used to match WAL records of type Series.
-	RecordSeries RecordType = 1
-	// RecordSamples is used to match WAL records of type Samples.
-	RecordSamples RecordType = 2
-	// RecordTombstones is used to match WAL records of type Tombstones.
-	RecordTombstones RecordType = 3
+	// Invalid is returned for unrecognised WAL record types.
+	Invalid Type = 255
+	// Series is used to match WAL records of type Series.
+	Series Type = 1
+	// Samples is used to match WAL records of type Samples.
+	Samples Type = 2
+	// Tombstones is used to match WAL records of type Tombstones.
+	Tombstones Type = 3
 )
 
-// RecordDecoder decodes series, sample, and tombstone records.
+// Decoder decodes series, sample, and tombstone records.
 // The zero value is ready to use.
-type RecordDecoder struct {
+type Decoder struct {
 }
 
 // Type returns the type of the record.
-// Return RecordInvalid if no valid record type is found.
-func (d *RecordDecoder) Type(rec []byte) RecordType {
+// Return Invalid if no valid record type is found.
+func (d *Decoder) Type(rec []byte) Type {
 	if len(rec) < 1 {
-		return RecordInvalid
+		return Invalid
 	}
-	switch t := RecordType(rec[0]); t {
-	case RecordSeries, RecordSamples, RecordTombstones:
+	switch t := Type(rec[0]); t {
+	case Series, Samples, Tombstones:
 		return t
 	}
-	return RecordInvalid
+	return Invalid
 }
 
 // Series appends series in rec to the given slice.
-func (d *RecordDecoder) Series(rec []byte, series []RefSeries) ([]RefSeries, error) {
+func (d *Decoder) Series(rec []byte, series []RefSeries) ([]RefSeries, error) {
 	dec := encoding.Decbuf{B: rec}
 
-	if RecordType(dec.Byte()) != RecordSeries {
+	if Type(dec.Byte()) != Series {
 		return nil, errors.New("invalid record type")
 	}
 	for len(dec.B) > 0 && dec.Err() == nil {
@@ -89,10 +89,10 @@ func (d *RecordDecoder) Series(rec []byte, series []RefSeries) ([]RefSeries, err
 }
 
 // Samples appends samples in rec to the given slice.
-func (d *RecordDecoder) Samples(rec []byte, samples []RefSample) ([]RefSample, error) {
+func (d *Decoder) Samples(rec []byte, samples []RefSample) ([]RefSample, error) {
 	dec := encoding.Decbuf{B: rec}
 
-	if RecordType(dec.Byte()) != RecordSamples {
+	if Type(dec.Byte()) != Samples {
 		return nil, errors.New("invalid record type")
 	}
 	if dec.Len() == 0 {
@@ -124,10 +124,10 @@ func (d *RecordDecoder) Samples(rec []byte, samples []RefSample) ([]RefSample, e
 }
 
 // Tombstones appends tombstones in rec to the given slice.
-func (d *RecordDecoder) Tombstones(rec []byte, tstones []tombstones.Stone) ([]tombstones.Stone, error) {
+func (d *Decoder) Tombstones(rec []byte, tstones []tombstones.Stone) ([]tombstones.Stone, error) {
 	dec := encoding.Decbuf{B: rec}
 
-	if RecordType(dec.Byte()) != RecordTombstones {
+	if Type(dec.Byte()) != Tombstones {
 		return nil, errors.New("invalid record type")
 	}
 	for dec.Len() > 0 && dec.Err() == nil {
@@ -147,15 +147,15 @@ func (d *RecordDecoder) Tombstones(rec []byte, tstones []tombstones.Stone) ([]to
 	return tstones, nil
 }
 
-// RecordEncoder encodes series, sample, and tombstones records.
+// Encoder encodes series, sample, and tombstones records.
 // The zero value is ready to use.
-type RecordEncoder struct {
+type Encoder struct {
 }
 
 // Series appends the encoded series to b and returns the resulting slice.
-func (e *RecordEncoder) Series(series []RefSeries, b []byte) []byte {
+func (e *Encoder) Series(series []RefSeries, b []byte) []byte {
 	buf := encoding.Encbuf{B: b}
-	buf.PutByte(byte(RecordSeries))
+	buf.PutByte(byte(Series))
 
 	for _, s := range series {
 		buf.PutBE64(s.Ref)
@@ -170,9 +170,9 @@ func (e *RecordEncoder) Series(series []RefSeries, b []byte) []byte {
 }
 
 // Samples appends the encoded samples to b and returns the resulting slice.
-func (e *RecordEncoder) Samples(samples []RefSample, b []byte) []byte {
+func (e *Encoder) Samples(samples []RefSample, b []byte) []byte {
 	buf := encoding.Encbuf{B: b}
-	buf.PutByte(byte(RecordSamples))
+	buf.PutByte(byte(Samples))
 
 	if len(samples) == 0 {
 		return buf.Get()
@@ -194,9 +194,9 @@ func (e *RecordEncoder) Samples(samples []RefSample, b []byte) []byte {
 }
 
 // Tombstones appends the encoded tombstones to b and returns the resulting slice.
-func (e *RecordEncoder) Tombstones(tstones []tombstones.Stone, b []byte) []byte {
+func (e *Encoder) Tombstones(tstones []tombstones.Stone, b []byte) []byte {
 	buf := encoding.Encbuf{B: b}
-	buf.PutByte(byte(RecordTombstones))
+	buf.PutByte(byte(Tombstones))
 
 	for _, s := range tstones {
 		for _, iv := range s.Intervals {
diff --git a/record/record_test.go b/record/record_test.go
index b9705238..304fa2b2 100644
--- a/record/record_test.go
+++ b/record/record_test.go
@@ -25,8 +25,8 @@ import (
 )
 
 func TestRecord_EncodeDecode(t *testing.T) {
-	var enc RecordEncoder
-	var dec RecordDecoder
+	var enc Encoder
+	var dec Decoder
 
 	series := []RefSeries{
 		{
@@ -78,8 +78,8 @@ func TestRecord_EncodeDecode(t *testing.T) {
 // TestRecord_Corruputed ensures that corrupted records return the correct error.
 // Bugfix check for pull/521 and pull/523.
 func TestRecord_Corruputed(t *testing.T) {
-	var enc RecordEncoder
-	var dec RecordDecoder
+	var enc Encoder
+	var dec Decoder
 
 	t.Run("Test corrupted series record", func(t *testing.T) {
 		series := []RefSeries{
diff --git a/wal.go b/wal.go
index 08cbedfe..abcfc88d 100644
--- a/wal.go
+++ b/wal.go
@@ -1255,7 +1255,7 @@ func MigrateWAL(logger log.Logger, dir string) (err error) {
 	rdr := w.Reader()
 
 	var (
-		enc record.RecordEncoder
+		enc record.Encoder
 		b   []byte
 	)
 	decErr := rdr.Read(
diff --git a/wal/checkpoint.go b/wal/checkpoint.go
index 1e9caa84..130c8357 100644
--- a/wal/checkpoint.go
+++ b/wal/checkpoint.go
@@ -153,8 +153,8 @@ func Checkpoint(w *WAL, from, to int, keep func(id uint64) bool, mint int64) (*C
 		series  []record.RefSeries
 		samples []record.RefSample
 		tstones []tombstones.Stone
-		dec     record.RecordDecoder
-		enc     record.RecordEncoder
+		dec     record.Decoder
+		enc     record.Encoder
 		buf     []byte
 		recs    [][]byte
 	)
@@ -168,7 +168,7 @@ func Checkpoint(w *WAL, from, to int, keep func(id uint64) bool, mint int64) (*C
 		rec := r.Record()
 
 		switch dec.Type(rec) {
-		case record.RecordSeries:
+		case record.Series:
 			series, err = dec.Series(rec, series)
 			if err != nil {
 				return nil, errors.Wrap(err, "decode series")
@@ -186,7 +186,7 @@ func Checkpoint(w *WAL, from, to int, keep func(id uint64) bool, mint int64) (*C
 			stats.TotalSeries += len(series)
 			stats.DroppedSeries += len(series) - len(repl)
 
-		case record.RecordSamples:
+		case record.Samples:
 			samples, err = dec.Samples(rec, samples)
 			if err != nil {
 				return nil, errors.Wrap(err, "decode samples")
@@ -204,7 +204,7 @@ func Checkpoint(w *WAL, from, to int, keep func(id uint64) bool, mint int64) (*C
 			stats.TotalSamples += len(samples)
 			stats.DroppedSamples += len(samples) - len(repl)
 
-		case record.RecordTombstones:
+		case record.Tombstones:
 			tstones, err = dec.Tombstones(rec, tstones)
 			if err != nil {
 				return nil, errors.Wrap(err, "decode deletes")
diff --git a/wal/checkpoint_test.go b/wal/checkpoint_test.go
index 37e52263..1d431ae7 100644
--- a/wal/checkpoint_test.go
+++ b/wal/checkpoint_test.go
@@ -94,7 +94,7 @@ func TestCheckpoint(t *testing.T) {
 				testutil.Ok(t, os.RemoveAll(dir))
 			}()
 
-			var enc record.RecordEncoder
+			var enc record.Encoder
 			// Create a dummy segment to bump the initial number.
 			seg, err := CreateSegment(dir, 100)
 			testutil.Ok(t, err)
@@ -165,7 +165,7 @@ func TestCheckpoint(t *testing.T) {
 			testutil.Ok(t, err)
 			defer sr.Close()
 
-			var dec record.RecordDecoder
+			var dec record.Decoder
 			var series []record.RefSeries
 			r := NewReader(sr)
 
@@ -173,10 +173,10 @@ func TestCheckpoint(t *testing.T) {
 				rec := r.Record()
 
 				switch dec.Type(rec) {
-				case record.RecordSeries:
+				case record.Series:
 					series, err = dec.Series(rec, series)
 					testutil.Ok(t, err)
-				case record.RecordSamples:
+				case record.Samples:
 					samples, err := dec.Samples(rec, nil)
 					testutil.Ok(t, err)
 					for _, s := range samples {
diff --git a/wal/wal_watcher.go b/wal/wal_watcher.go
index bc21994d..ad998cea 100644
--- a/wal/wal_watcher.go
+++ b/wal/wal_watcher.go
@@ -99,8 +99,8 @@ type writeTo interface {
 	SeriesReset(int)
 }
 
-// WALWatcher watches the TSDB WAL for a given WriteTo.
-type WALWatcher struct {
+// Watcher watches the TSDB WAL for a given WriteTo.
+type Watcher struct {
 	name           string
 	writer         writeTo
 	logger         log.Logger
@@ -122,20 +122,20 @@ type WALWatcher struct {
 	maxSegment int
 }
 
-// NewWALWatcher creates a new WAL watcher for a given WriteTo.
-func NewWALWatcher(logger log.Logger, reg prometheus.Registerer, name string, writer writeTo, walDir string) *WALWatcher {
+// NewWatcher creates a new WAL watcher for a given WriteTo.
+func NewWatcher(logger log.Logger, reg prometheus.Registerer, name string, writer writeTo, walDir string) *Watcher {
 	if logger == nil {
 		logger = log.NewNopLogger()
 	}
 	if reg != nil {
-		// We can't use MustRegister because WALWatcher's are recreated on config changes within Prometheus.
+		// We can't use MustRegister because Watcher's are recreated on config changes within Prometheus.
 		reg.Register(watcherRecordsRead)
 		reg.Register(watcherRecordDecodeFails)
 		reg.Register(watcherSamplesSentPreTailing)
 		reg.Register(watcherCurrentSegment)
 	}
 
-	return &WALWatcher{
+	return &Watcher{
 		logger: logger,
 		reg:    reg,
 		writer: writer,
@@ -148,7 +148,7 @@ func NewWALWatcher(logger log.Logger, reg prometheus.Registerer, name string, wr
 	}
 }
 
-func (w *WALWatcher) setMetrics() {
+func (w *Watcher) setMetrics() {
 	// Setup the WAL Watchers metrics. We do this here rather than in the
 	// constructor because of the ordering of creating Queue Managers's,
 	// stopping them, and then starting new ones in storage/remote/storage.go ApplyConfig.
@@ -158,16 +158,16 @@ func (w *WALWatcher) setMetrics() {
 	w.currentSegmentMetric = watcherCurrentSegment.WithLabelValues(w.name)
 }
 
-// Start the WALWatcher.
-func (w *WALWatcher) Start() {
+// Start the Watcher.
+func (w *Watcher) Start() {
 	w.setMetrics()
 	level.Info(w.logger).Log("msg", "starting WAL watcher", "queue", w.name)
 
 	go w.loop()
 }
 
-// Stop the WALWatcher.
-func (w *WALWatcher) Stop() {
+// Stop the Watcher.
+func (w *Watcher) Stop() {
 	close(w.quit)
 	<-w.done
 
@@ -181,7 +181,7 @@ func (w *WALWatcher) Stop() {
 	level.Info(w.logger).Log("msg", "WAL watcher stopped", "queue", w.name)
 }
 
-func (w *WALWatcher) loop() {
+func (w *Watcher) loop() {
 	defer close(w.done)
 
 	// We may encourter failures processing the WAL; we should wait and retry.
@@ -199,7 +199,7 @@ func (w *WALWatcher) loop() {
 	}
 }
 
-func (w *WALWatcher) run() error {
+func (w *Watcher) run() error {
 	_, lastSegment, err := w.firstAndLast()
 	if err != nil {
 		return errors.Wrap(err, "wal.Segments")
@@ -246,7 +246,7 @@ func (w *WALWatcher) run() error {
 }
 
 // findSegmentForIndex finds the first segment greater than or equal to index.
-func (w *WALWatcher) findSegmentForIndex(index int) (int, error) {
+func (w *Watcher) findSegmentForIndex(index int) (int, error) {
 	refs, err := w.segments(w.walDir)
 	if err != nil {
 		return -1, nil
@@ -261,7 +261,7 @@ func (w *WALWatcher) findSegmentForIndex(index int) (int, error) {
 	return -1, errors.New("failed to find segment for index")
 }
 
-func (w *WALWatcher) firstAndLast() (int, int, error) {
+func (w *Watcher) firstAndLast() (int, int, error) {
 	refs, err := w.segments(w.walDir)
 	if err != nil {
 		return -1, -1, nil
@@ -275,7 +275,7 @@ func (w *WALWatcher) firstAndLast() (int, int, error) {
 
 // Copied from tsdb/wal/wal.go so we do not have to open a WAL.
 // Plan is to move WAL watcher to TSDB and dedupe these implementations.
-func (w *WALWatcher) segments(dir string) ([]int, error) {
+func (w *Watcher) segments(dir string) ([]int, error) {
 	files, err := fileutil.ReadDir(dir)
 	if err != nil {
 		return nil, err
@@ -302,7 +302,7 @@ func (w *WALWatcher) segments(dir string) ([]int, error) {
 // Use tail true to indicate that the reader is currently on a segment that is
 // actively being written to. If false, assume it's a full segment and we're
 // replaying it on start to cache the series records.
-func (w *WALWatcher) watch(segmentNum int, tail bool) error {
+func (w *Watcher) watch(segmentNum int, tail bool) error {
 	segment, err := OpenReadSegment(SegmentName(w.walDir, segmentNum))
 	if err != nil {
 		return err
@@ -397,7 +397,7 @@ func (w *WALWatcher) watch(segmentNum int, tail bool) error {
 	}
 }
 
-func (w *WALWatcher) garbageCollectSeries(segmentNum int) error {
+func (w *Watcher) garbageCollectSeries(segmentNum int) error {
 	dir, _, err := LastCheckpoint(w.walDir)
 	if err != nil && err != record.ErrNotFound {
 		return errors.Wrap(err, "LastCheckpoint")
@@ -429,19 +429,19 @@ func (w *WALWatcher) garbageCollectSeries(segmentNum int) error {
 	return nil
 }
 
-func (w *WALWatcher) readSegment(r *LiveReader, segmentNum int, tail bool) error {
+func (w *Watcher) readSegment(r *LiveReader, segmentNum int, tail bool) error {
 	var (
-		dec     record.RecordDecoder
+		dec     record.Decoder
 		series  []record.RefSeries
 		samples []record.RefSample
 	)
 
 	for r.Next() && !isClosed(w.quit) {
 		rec := r.Record()
-		w.recordsReadMetric.WithLabelValues(recordType(dec.Type(rec))).Inc()
+		w.recordsReadMetric.WithLabelValues(Type(dec.Type(rec))).Inc()
 
 		switch dec.Type(rec) {
-		case record.RecordSeries:
+		case record.Series:
 			series, err := dec.Series(rec, series[:0])
 			if err != nil {
 				w.recordDecodeFailsMetric.Inc()
@@ -449,7 +449,7 @@ func (w *WALWatcher) readSegment(r *LiveReader, segmentNum int, tail bool) error
 			}
 			w.writer.StoreSeries(series, segmentNum)
 
-		case record.RecordSamples:
+		case record.Samples:
 			// If we're not tailing a segment we can ignore any samples records we see.
 			// This speeds up replay of the WAL by > 10x.
 			if !tail {
@@ -471,9 +471,9 @@ func (w *WALWatcher) readSegment(r *LiveReader, segmentNum int, tail bool) error
 				w.writer.Append(send)
 			}
 
-		case record.RecordTombstones:
+		case record.Tombstones:
 			// noop
-		case record.RecordInvalid:
+		case record.Invalid:
 			return errors.New("invalid record")
 
 		default:
@@ -484,15 +484,15 @@ func (w *WALWatcher) readSegment(r *LiveReader, segmentNum int, tail bool) error
 	return r.Err()
 }
 
-func recordType(rt record.RecordType) string {
+func Type(rt record.Type) string {
 	switch rt {
-	case record.RecordInvalid:
+	case record.Invalid:
 		return "invalid"
-	case record.RecordSeries:
+	case record.Series:
 		return "series"
-	case record.RecordSamples:
+	case record.Samples:
 		return "samples"
-	case record.RecordTombstones:
+	case record.Tombstones:
 		return "tombstones"
 	default:
 		return "unknown"
@@ -500,7 +500,7 @@ func recordType(rt record.RecordType) string {
 }
 
 // Read all the series records from a Checkpoint directory.
-func (w *WALWatcher) readCheckpoint(checkpointDir string) error {
+func (w *Watcher) readCheckpoint(checkpointDir string) error {
 	level.Debug(w.logger).Log("msg", "reading checkpoint", "dir", checkpointDir)
 	index, err := checkpointNum(checkpointDir)
 	if err != nil {
diff --git a/wal/wal_watcher_test.go b/wal/wal_watcher_test.go
index 00129a23..377d93d9 100644
--- a/wal/wal_watcher_test.go
+++ b/wal/wal_watcher_test.go
@@ -102,7 +102,7 @@ func TestTailSamples(t *testing.T) {
 	err = os.Mkdir(wdir, 0777)
 	testutil.Ok(t, err)
 
-	enc := record.RecordEncoder{}
+	enc := record.Encoder{}
 	w, err := NewSize(nil, prometheus.DefaultRegisterer, wdir, 128*pageSize, false)
 	testutil.Ok(t, err)
 
@@ -135,7 +135,7 @@ func TestTailSamples(t *testing.T) {
 	testutil.Ok(t, err)
 
 	wt := newWriteToMock()
-	watcher := NewWALWatcher(nil, nil, "", wt, dir)
+	watcher := NewWatcher(nil, nil, "", wt, dir)
 	watcher.startTime = now.UnixNano()
 
 	// Set the Watcher's metrics so they're not nil pointers.
@@ -176,7 +176,7 @@ func TestReadToEndNoCheckpoint(t *testing.T) {
 
 	var recs [][]byte
 
-	enc := record.RecordEncoder{}
+	enc := record.Encoder{}
 
 	for i := 0; i < seriesCount; i++ {
 		series := enc.Series([]record.RefSeries{
@@ -210,7 +210,7 @@ func TestReadToEndNoCheckpoint(t *testing.T) {
 	testutil.Ok(t, err)
 
 	wt := newWriteToMock()
-	watcher := NewWALWatcher(nil, nil, "", wt, dir)
+	watcher := NewWatcher(nil, nil, "", wt, dir)
 	go watcher.Start()
 
 	expected := seriesCount
@@ -236,7 +236,7 @@ func TestReadToEndWithCheckpoint(t *testing.T) {
 	err = os.Mkdir(wdir, 0777)
 	testutil.Ok(t, err)
 
-	enc := record.RecordEncoder{}
+	enc := record.Encoder{}
 	w, err := NewSize(nil, nil, wdir, segmentSize, false)
 	testutil.Ok(t, err)
 
@@ -292,7 +292,7 @@ func TestReadToEndWithCheckpoint(t *testing.T) {
 	_, _, err = w.Segments()
 	testutil.Ok(t, err)
 	wt := newWriteToMock()
-	watcher := NewWALWatcher(nil, nil, "", wt, dir)
+	watcher := NewWatcher(nil, nil, "", wt, dir)
 	go watcher.Start()
 
 	expected := seriesCount * 2
@@ -318,7 +318,7 @@ func TestReadCheckpoint(t *testing.T) {
 
 	os.Create(SegmentName(wdir, 30))
 
-	enc := record.RecordEncoder{}
+	enc := record.Encoder{}
 	w, err := NewSize(nil, nil, wdir, 128*pageSize, false)
 	testutil.Ok(t, err)
 
@@ -353,7 +353,7 @@ func TestReadCheckpoint(t *testing.T) {
 	testutil.Ok(t, err)
 
 	wt := newWriteToMock()
-	watcher := NewWALWatcher(nil, nil, "", wt, dir)
+	watcher := NewWatcher(nil, nil, "", wt, dir)
 	// watcher.
 	go watcher.Start()
 
@@ -380,7 +380,7 @@ func TestReadCheckpointMultipleSegments(t *testing.T) {
 	err = os.Mkdir(wdir, 0777)
 	testutil.Ok(t, err)
 
-	enc := record.RecordEncoder{}
+	enc := record.Encoder{}
 	w, err := NewSize(nil, nil, wdir, pageSize, false)
 	testutil.Ok(t, err)
 
@@ -415,7 +415,7 @@ func TestReadCheckpointMultipleSegments(t *testing.T) {
 	}, 0)
 
 	wt := newWriteToMock()
-	watcher := NewWALWatcher(nil, nil, "", wt, dir)
+	watcher := NewWatcher(nil, nil, "", wt, dir)
 	watcher.maxSegment = -1
 
 	// Set the Watcher's metrics so they're not nil pointers.
@@ -443,7 +443,7 @@ func TestCheckpointSeriesReset(t *testing.T) {
 	err = os.Mkdir(wdir, 0777)
 	testutil.Ok(t, err)
 
-	enc := record.RecordEncoder{}
+	enc := record.Encoder{}
 	w, err := NewSize(nil, nil, wdir, segmentSize, false)
 	testutil.Ok(t, err)
 
@@ -475,7 +475,7 @@ func TestCheckpointSeriesReset(t *testing.T) {
 	testutil.Ok(t, err)
 
 	wt := newWriteToMock()
-	watcher := NewWALWatcher(nil, nil, "", wt, dir)
+	watcher := NewWatcher(nil, nil, "", wt, dir)
 	watcher.maxSegment = -1
 	go watcher.Start()
 
diff --git a/wal_test.go b/wal_test.go
index c84a899d..1a18e2d2 100644
--- a/wal_test.go
+++ b/wal_test.go
@@ -512,7 +512,7 @@ func TestMigrateWAL_Fuzz(t *testing.T) {
 	testutil.Ok(t, err)
 
 	// We can properly write some new data after migration.
-	var enc record.RecordEncoder
+	var enc record.Encoder
 	testutil.Ok(t, w.Log(enc.Samples([]record.RefSample{
 		{Ref: 500, T: 1, V: 1},
 	}, nil)))
@@ -525,21 +525,21 @@ func TestMigrateWAL_Fuzz(t *testing.T) {
 
 	r := wal.NewReader(sr)
 	var res []interface{}
-	var dec record.RecordDecoder
+	var dec record.Decoder
 
 	for r.Next() {
 		rec := r.Record()
 
 		switch dec.Type(rec) {
-		case record.RecordSeries:
+		case record.Series:
 			s, err := dec.Series(rec, nil)
 			testutil.Ok(t, err)
 			res = append(res, s)
-		case record.RecordSamples:
+		case record.Samples:
 			s, err := dec.Samples(rec, nil)
 			testutil.Ok(t, err)
 			res = append(res, s)
-		case record.RecordTombstones:
+		case record.Tombstones:
 			s, err := dec.Tombstones(rec, nil)
 			testutil.Ok(t, err)
 			res = append(res, s)

From fe0139201564ffd608898ad8e5c8cfafbeab86c0 Mon Sep 17 00:00:00 2001
From: Callum Styan <callumstyan@gmail.com>
Date: Thu, 4 Jul 2019 13:13:20 -0700
Subject: [PATCH 11/16] Rename WALWatcher -> Watcher; fix creation/registration
 of it's metrics.

Signed-off-by: Callum Styan <callumstyan@gmail.com>
---
 wal/{wal_watcher.go => watcher.go}           | 148 ++++++++++---------
 wal/{wal_watcher_test.go => watcher_test.go} |  12 +-
 2 files changed, 87 insertions(+), 73 deletions(-)
 rename wal/{wal_watcher.go => watcher.go} (82%)
 rename wal/{wal_watcher_test.go => watcher_test.go} (95%)

diff --git a/wal/wal_watcher.go b/wal/watcher.go
similarity index 82%
rename from wal/wal_watcher.go
rename to wal/watcher.go
index ad998cea..b3732aab 100644
--- a/wal/wal_watcher.go
+++ b/wal/watcher.go
@@ -39,43 +39,14 @@ const (
 	consumer           = "consumer"
 )
 
+type watcherMetrics struct {
+	recordsRead           *prometheus.CounterVec
+	recordDecodeFails     *prometheus.CounterVec
+	samplesSentPreTailing *prometheus.CounterVec
+	currentSegment        *prometheus.GaugeVec
+}
+
 var (
-	watcherRecordsRead = prometheus.NewCounterVec(
-		prometheus.CounterOpts{
-			Namespace: "prometheus",
-			Subsystem: "wal_watcher",
-			Name:      "records_read_total",
-			Help:      "Number of records read by the WAL watcher from the WAL.",
-		},
-		[]string{consumer, "type"},
-	)
-	watcherRecordDecodeFails = prometheus.NewCounterVec(
-		prometheus.CounterOpts{
-			Namespace: "prometheus",
-			Subsystem: "wal_watcher",
-			Name:      "record_decode_failures_total",
-			Help:      "Number of records read by the WAL watcher that resulted in an error when decoding.",
-		},
-		[]string{consumer},
-	)
-	watcherSamplesSentPreTailing = prometheus.NewCounterVec(
-		prometheus.CounterOpts{
-			Namespace: "prometheus",
-			Subsystem: "wal_watcher",
-			Name:      "samples_sent_pre_tailing_total",
-			Help:      "Number of sample records read by the WAL watcher and sent to remote write during replay of existing WAL.",
-		},
-		[]string{consumer},
-	)
-	watcherCurrentSegment = prometheus.NewGaugeVec(
-		prometheus.GaugeOpts{
-			Namespace: "prometheus",
-			Subsystem: "wal_watcher",
-			Name:      "current_segment",
-			Help:      "Current segment the WAL watcher is reading records from.",
-		},
-		[]string{consumer},
-	)
 	lrMetrics = NewLiveReaderMetrics(prometheus.DefaultRegisterer)
 )
 
@@ -86,12 +57,12 @@ func FromTime(t time.Time) int64 {
 	return t.Unix()*1000 + int64(t.Nanosecond())/int64(time.Millisecond)
 }
 
-func init() {
-	prometheus.MustRegister(watcherRecordsRead)
-	prometheus.MustRegister(watcherRecordDecodeFails)
-	prometheus.MustRegister(watcherSamplesSentPreTailing)
-	prometheus.MustRegister(watcherCurrentSegment)
-}
+// func init() {
+// 	prometheus.MustRegister(watcherRecordsRead)
+// 	prometheus.MustRegister(watcherRecordDecodeFails)
+// 	prometheus.MustRegister(watcherSamplesSentPreTailing)
+// 	prometheus.MustRegister(watcherCurrentSegment)
+// }
 
 type writeTo interface {
 	Append([]record.RefSample) bool
@@ -106,7 +77,7 @@ type Watcher struct {
 	logger         log.Logger
 	walDir         string
 	lastCheckpoint string
-	reg            prometheus.Registerer
+	metrics        *watcherMetrics
 
 	startTime int64
 
@@ -122,27 +93,70 @@ type Watcher struct {
 	maxSegment int
 }
 
+func NewWatcherMetrics(reg prometheus.Registerer) *watcherMetrics {
+	m := &watcherMetrics{
+		recordsRead: prometheus.NewCounterVec(
+			prometheus.CounterOpts{
+				Namespace: "prometheus",
+				Subsystem: "wal_watcher",
+				Name:      "records_read_total",
+				Help:      "Number of records read by the WAL watcher from the WAL.",
+			},
+			[]string{consumer, "type"},
+		),
+		recordDecodeFails: prometheus.NewCounterVec(
+			prometheus.CounterOpts{
+				Namespace: "prometheus",
+				Subsystem: "wal_watcher",
+				Name:      "record_decode_failures_total",
+				Help:      "Number of records read by the WAL watcher that resulted in an error when decoding.",
+			},
+			[]string{consumer},
+		),
+		samplesSentPreTailing: prometheus.NewCounterVec(
+			prometheus.CounterOpts{
+				Namespace: "prometheus",
+				Subsystem: "wal_watcher",
+				Name:      "samples_sent_pre_tailing_total",
+				Help:      "Number of sample records read by the WAL watcher and sent to remote write during replay of existing WAL.",
+			},
+			[]string{consumer},
+		),
+		currentSegment: prometheus.NewGaugeVec(
+			prometheus.GaugeOpts{
+				Namespace: "prometheus",
+				Subsystem: "wal_watcher",
+				Name:      "current_segment",
+				Help:      "Current segment the WAL watcher is reading records from.",
+			},
+			[]string{consumer},
+		),
+	}
+
+	if reg != nil {
+		reg.Register(m.recordsRead)
+		reg.Register(m.recordDecodeFails)
+		reg.Register(m.samplesSentPreTailing)
+		reg.Register(m.currentSegment)
+	}
+
+	return m
+}
+
 // NewWatcher creates a new WAL watcher for a given WriteTo.
-func NewWatcher(logger log.Logger, reg prometheus.Registerer, name string, writer writeTo, walDir string) *Watcher {
+func NewWatcher(logger log.Logger, metrics *watcherMetrics, name string, writer writeTo, walDir string) *Watcher {
 	if logger == nil {
 		logger = log.NewNopLogger()
 	}
-	if reg != nil {
-		// We can't use MustRegister because Watcher's are recreated on config changes within Prometheus.
-		reg.Register(watcherRecordsRead)
-		reg.Register(watcherRecordDecodeFails)
-		reg.Register(watcherSamplesSentPreTailing)
-		reg.Register(watcherCurrentSegment)
-	}
 
 	return &Watcher{
-		logger: logger,
-		reg:    reg,
-		writer: writer,
-		walDir: path.Join(walDir, "wal"),
-		name:   name,
-		quit:   make(chan struct{}),
-		done:   make(chan struct{}),
+		logger:  logger,
+		metrics: metrics,
+		writer:  writer,
+		walDir:  path.Join(walDir, "wal"),
+		name:    name,
+		quit:    make(chan struct{}),
+		done:    make(chan struct{}),
 
 		maxSegment: -1,
 	}
@@ -152,10 +166,10 @@ func (w *Watcher) setMetrics() {
 	// Setup the WAL Watchers metrics. We do this here rather than in the
 	// constructor because of the ordering of creating Queue Managers's,
 	// stopping them, and then starting new ones in storage/remote/storage.go ApplyConfig.
-	w.recordsReadMetric = watcherRecordsRead.MustCurryWith(prometheus.Labels{consumer: w.name})
-	w.recordDecodeFailsMetric = watcherRecordDecodeFails.WithLabelValues(w.name)
-	w.samplesSentPreTailing = watcherSamplesSentPreTailing.WithLabelValues(w.name)
-	w.currentSegmentMetric = watcherCurrentSegment.WithLabelValues(w.name)
+	w.recordsReadMetric = w.metrics.recordsRead.MustCurryWith(prometheus.Labels{consumer: w.name})
+	w.recordDecodeFailsMetric = w.metrics.recordDecodeFails.WithLabelValues(w.name)
+	w.samplesSentPreTailing = w.metrics.samplesSentPreTailing.WithLabelValues(w.name)
+	w.currentSegmentMetric = w.metrics.currentSegment.WithLabelValues(w.name)
 }
 
 // Start the Watcher.
@@ -172,11 +186,11 @@ func (w *Watcher) Stop() {
 	<-w.done
 
 	// Records read metric has series and samples.
-	watcherRecordsRead.DeleteLabelValues(w.name, "series")
-	watcherRecordsRead.DeleteLabelValues(w.name, "samples")
-	watcherRecordDecodeFails.DeleteLabelValues(w.name)
-	watcherSamplesSentPreTailing.DeleteLabelValues(w.name)
-	watcherCurrentSegment.DeleteLabelValues(w.name)
+	w.metrics.recordsRead.DeleteLabelValues(w.name, "series")
+	w.metrics.recordsRead.DeleteLabelValues(w.name, "samples")
+	w.metrics.recordDecodeFails.DeleteLabelValues(w.name)
+	w.metrics.samplesSentPreTailing.DeleteLabelValues(w.name)
+	w.metrics.currentSegment.DeleteLabelValues(w.name)
 
 	level.Info(w.logger).Log("msg", "WAL watcher stopped", "queue", w.name)
 }
diff --git a/wal/wal_watcher_test.go b/wal/watcher_test.go
similarity index 95%
rename from wal/wal_watcher_test.go
rename to wal/watcher_test.go
index 377d93d9..ab80db85 100644
--- a/wal/wal_watcher_test.go
+++ b/wal/watcher_test.go
@@ -135,7 +135,7 @@ func TestTailSamples(t *testing.T) {
 	testutil.Ok(t, err)
 
 	wt := newWriteToMock()
-	watcher := NewWatcher(nil, nil, "", wt, dir)
+	watcher := NewWatcher(nil, NewWatcherMetrics(prometheus.DefaultRegisterer), "", wt, dir)
 	watcher.startTime = now.UnixNano()
 
 	// Set the Watcher's metrics so they're not nil pointers.
@@ -210,7 +210,7 @@ func TestReadToEndNoCheckpoint(t *testing.T) {
 	testutil.Ok(t, err)
 
 	wt := newWriteToMock()
-	watcher := NewWatcher(nil, nil, "", wt, dir)
+	watcher := NewWatcher(nil, NewWatcherMetrics(prometheus.DefaultRegisterer), "", wt, dir)
 	go watcher.Start()
 
 	expected := seriesCount
@@ -292,7 +292,7 @@ func TestReadToEndWithCheckpoint(t *testing.T) {
 	_, _, err = w.Segments()
 	testutil.Ok(t, err)
 	wt := newWriteToMock()
-	watcher := NewWatcher(nil, nil, "", wt, dir)
+	watcher := NewWatcher(nil, NewWatcherMetrics(prometheus.DefaultRegisterer), "", wt, dir)
 	go watcher.Start()
 
 	expected := seriesCount * 2
@@ -353,7 +353,7 @@ func TestReadCheckpoint(t *testing.T) {
 	testutil.Ok(t, err)
 
 	wt := newWriteToMock()
-	watcher := NewWatcher(nil, nil, "", wt, dir)
+	watcher := NewWatcher(nil, NewWatcherMetrics(prometheus.DefaultRegisterer), "", wt, dir)
 	// watcher.
 	go watcher.Start()
 
@@ -415,7 +415,7 @@ func TestReadCheckpointMultipleSegments(t *testing.T) {
 	}, 0)
 
 	wt := newWriteToMock()
-	watcher := NewWatcher(nil, nil, "", wt, dir)
+	watcher := NewWatcher(nil, NewWatcherMetrics(prometheus.DefaultRegisterer), "", wt, dir)
 	watcher.maxSegment = -1
 
 	// Set the Watcher's metrics so they're not nil pointers.
@@ -475,7 +475,7 @@ func TestCheckpointSeriesReset(t *testing.T) {
 	testutil.Ok(t, err)
 
 	wt := newWriteToMock()
-	watcher := NewWatcher(nil, nil, "", wt, dir)
+	watcher := NewWatcher(nil, NewWatcherMetrics(prometheus.DefaultRegisterer), "", wt, dir)
 	watcher.maxSegment = -1
 	go watcher.Start()
 

From 7e36b01db9c314e8cce533305e8b601af6627512 Mon Sep 17 00:00:00 2001
From: Callum Styan <callumstyan@gmail.com>
Date: Wed, 10 Jul 2019 11:19:40 -0700
Subject: [PATCH 12/16] Review fixes; mostly some things that don't need to be
 exported after refactor.

Signed-off-by: Callum Styan <callumstyan@gmail.com>
---
 head.go            | 142 +++++++++++++--------------------------------
 head_test.go       |  62 ++++++++++----------
 querier.go         |   2 +-
 record/internal.go |  37 ------------
 record/record.go   |  18 ++++++
 5 files changed, 89 insertions(+), 172 deletions(-)
 delete mode 100644 record/internal.go

diff --git a/head.go b/head.go
index 26c45402..c309aba3 100644
--- a/head.go
+++ b/head.go
@@ -282,7 +282,7 @@ func (h *Head) processWALSamples(
 				}
 				refSeries[s.Ref] = ms
 			}
-			_, chunkCreated := ms.Append(s.T, s.V)
+			_, chunkCreated := ms.append(s.T, s.V)
 			if chunkCreated {
 				h.metrics.chunksCreated.Inc()
 				h.metrics.chunks.Inc()
@@ -872,7 +872,7 @@ func (a *headAppender) AddFast(ref uint64, t int64, v float64) error {
 		return errors.Wrap(ErrNotFound, "unknown series")
 	}
 	s.Lock()
-	if err := s.Appendable(t, v); err != nil {
+	if err := s.appendable(t, v); err != nil {
 		s.Unlock()
 		return err
 	}
@@ -939,7 +939,7 @@ func (a *headAppender) Commit() error {
 	for i, s := range a.samples {
 		series = a.sampleSeries[i]
 		series.Lock()
-		ok, chunkCreated := series.Append(s.T, s.V)
+		ok, chunkCreated := series.append(s.T, s.V)
 		series.pendingCommit = false
 		series.Unlock()
 
@@ -968,6 +968,7 @@ func (a *headAppender) Rollback() error {
 		series.Unlock()
 	}
 	a.head.putAppendBuffer(a.samples)
+	a.head.putSeriesBuffer(a.sampleSeries)
 
 	// Series are created in the head memory regardless of rollback. Thus we have
 	// to log them to the WAL in any case.
@@ -993,7 +994,7 @@ func (h *Head) Delete(mint, maxt int64, ms ...labels.Matcher) error {
 	for p.Next() {
 		series := h.series.getByID(p.At())
 
-		t0, t1 := series.MinTime(), series.MaxTime()
+		t0, t1 := series.minTime(), series.maxTime()
 		if t0 == math.MinInt64 || t1 == math.MinInt64 {
 			continue
 		}
@@ -1041,14 +1042,14 @@ func (h *Head) chunkRewrite(ref uint64, dranges tombstones.Intervals) (err error
 		return nil
 	}
 
-	metas := ms.ChunksMetas()
+	metas := ms.chunksMetas()
 	mint, maxt := metas[0].MinTime, metas[len(metas)-1].MaxTime
 	it := newChunkSeriesIterator(metas, dranges, mint, maxt)
 
-	ms.Reset()
+	ms.reset()
 	for it.Next() {
 		t, v := it.At()
-		ok, _ := ms.Append(t, v)
+		ok, _ := ms.append(t, v)
 		if !ok {
 			level.Warn(h.logger).Log("msg", "failed to add sample during delete")
 		}
@@ -1230,7 +1231,7 @@ func (h *headChunkReader) Chunk(ref uint64) (chunkenc.Chunk, error) {
 	}
 
 	s.Lock()
-	c := s.Chunk(int(cid))
+	c := s.chunk(int(cid))
 
 	// This means that the chunk has been garbage collected or is outside
 	// the specified range.
@@ -1241,7 +1242,7 @@ func (h *headChunkReader) Chunk(ref uint64) (chunkenc.Chunk, error) {
 	s.Unlock()
 
 	return &safeChunk{
-		Chunk: c.Chunk,
+		Chunk: c.chunk,
 		s:     s,
 		cid:   int(cid),
 	}, nil
@@ -1367,7 +1368,7 @@ func (h *headIndexReader) Series(ref uint64, lbls *labels.Labels, chks *[]chunks
 			continue
 		}
 		// Set the head chunks as open (being appended to).
-		maxTime := c.MaxTime
+		maxTime := c.maxTime
 		if s.headChunk == c {
 			maxTime = math.MaxInt64
 		}
@@ -1526,7 +1527,7 @@ func (s *stripeSeries) gc(mint int64) (map[uint64]struct{}, int) {
 		for hash, all := range s.hashes[i] {
 			for _, series := range all {
 				series.Lock()
-				rmChunks += series.TruncateChunksBefore(mint)
+				rmChunks += series.truncateChunksBefore(mint)
 
 				if len(series.chunks) > 0 || series.pendingCommit {
 					series.Unlock()
@@ -1621,11 +1622,10 @@ func (s sample) V() float64 {
 type memSeries struct {
 	sync.Mutex
 
-	ref       uint64
-	chunks    []*memChunk
-	lset      labels.Labels
-	headChunk *memChunk
-
+	ref          uint64
+	chunks       []*memChunk
+	lset         labels.Labels
+	headChunk    *memChunk
 	chunkRange   int64
 	firstChunkID int
 
@@ -1646,26 +1646,26 @@ func newMemSeries(lset labels.Labels, id uint64, chunkRange int64) *memSeries {
 	return s
 }
 
-func (s *memSeries) MinTime() int64 {
+func (s *memSeries) minTime() int64 {
 	if len(s.chunks) == 0 {
 		return math.MinInt64
 	}
-	return s.chunks[0].MinTime
+	return s.chunks[0].minTime
 }
 
-func (s *memSeries) MaxTime() int64 {
+func (s *memSeries) maxTime() int64 {
 	c := s.head()
 	if c == nil {
 		return math.MinInt64
 	}
-	return c.MaxTime
+	return c.maxTime
 }
 
 func (s *memSeries) cut(mint int64) *memChunk {
 	c := &memChunk{
-		Chunk:   chunkenc.NewXORChunk(),
-		MinTime: mint,
-		MaxTime: math.MinInt64,
+		chunk:   chunkenc.NewXORChunk(),
+		minTime: mint,
+		maxTime: math.MinInt64,
 	}
 	s.chunks = append(s.chunks, c)
 	s.headChunk = c
@@ -1674,7 +1674,7 @@ func (s *memSeries) cut(mint int64) *memChunk {
 	// may be chosen dynamically at a later point.
 	s.nextAt = rangeForTimestamp(mint, s.chunkRange)
 
-	app, err := c.Chunk.Appender()
+	app, err := c.chunk.Appender()
 	if err != nil {
 		panic(err)
 	}
@@ -1682,17 +1682,17 @@ func (s *memSeries) cut(mint int64) *memChunk {
 	return c
 }
 
-func (s *memSeries) ChunksMetas() []chunks.Meta {
+func (s *memSeries) chunksMetas() []chunks.Meta {
 	metas := make([]chunks.Meta, 0, len(s.chunks))
 	for _, chk := range s.chunks {
-		metas = append(metas, chunks.Meta{Chunk: chk.Chunk, MinTime: chk.MinTime, MaxTime: chk.MaxTime})
+		metas = append(metas, chunks.Meta{Chunk: chk.chunk, MinTime: chk.minTime, MaxTime: chk.maxTime})
 	}
 	return metas
 }
 
 // reset re-initialises all the variable in the memSeries except 'lset', 'ref',
 // and 'chunkRange', like how it would appear after 'newmemSeries(...)'.
-func (s *memSeries) Reset() {
+func (s *memSeries) reset() {
 	s.chunks = nil
 	s.headChunk = nil
 	s.firstChunkID = 0
@@ -1703,16 +1703,16 @@ func (s *memSeries) Reset() {
 }
 
 // Appendable checks whether the given sample is valid for appending to the series.
-func (s *memSeries) Appendable(t int64, v float64) error {
+func (s *memSeries) appendable(t int64, v float64) error {
 	c := s.head()
 	if c == nil {
 		return nil
 	}
 
-	if t > c.MaxTime {
+	if t > c.maxTime {
 		return nil
 	}
-	if t < c.MaxTime {
+	if t < c.maxTime {
 		return ErrOutOfOrderSample
 	}
 	// We are allowing exact duplicates as we can encounter them in valid cases
@@ -1723,7 +1723,7 @@ func (s *memSeries) Appendable(t int64, v float64) error {
 	return nil
 }
 
-func (s *memSeries) Chunk(id int) *memChunk {
+func (s *memSeries) chunk(id int) *memChunk {
 	ix := id - s.firstChunkID
 	if ix < 0 || ix >= len(s.chunks) {
 		return nil
@@ -1731,16 +1731,16 @@ func (s *memSeries) Chunk(id int) *memChunk {
 	return s.chunks[ix]
 }
 
-func (s *memSeries) ChunkID(pos int) int {
+func (s *memSeries) chunkID(pos int) int {
 	return pos + s.firstChunkID
 }
 
 // TruncateChunksBefore removes all chunks from the series that have not timestamp
 // at or after mint. Chunk IDs remain unchanged.
-func (s *memSeries) TruncateChunksBefore(mint int64) (removed int) {
+func (s *memSeries) truncateChunksBefore(mint int64) (removed int) {
 	var k int
 	for i, c := range s.chunks {
-		if c.MaxTime >= mint {
+		if c.maxTime >= mint {
 			break
 		}
 		k = i + 1
@@ -1757,7 +1757,7 @@ func (s *memSeries) TruncateChunksBefore(mint int64) (removed int) {
 }
 
 // Append adds the sample (t, v) to the series.
-func (s *memSeries) Append(t int64, v float64) (success, chunkCreated bool) {
+func (s *memSeries) append(t int64, v float64) (success, chunkCreated bool) {
 	// Based on Gorilla white papers this offers near-optimal compression ratio
 	// so anything bigger that this has diminishing returns and increases
 	// the time range within which we have to decompress all samples.
@@ -1769,17 +1769,17 @@ func (s *memSeries) Append(t int64, v float64) (success, chunkCreated bool) {
 		c = s.cut(t)
 		chunkCreated = true
 	}
-	numSamples := c.Chunk.NumSamples()
+	numSamples := c.chunk.NumSamples()
 
 	// Out of order sample.
-	if c.MaxTime >= t {
+	if c.maxTime >= t {
 		return false, chunkCreated
 	}
 	// If we reach 25% of a chunk's desired sample count, set a definitive time
 	// at which to start the next chunk.
 	// At latest it must happen at the timestamp set when the chunk was cut.
 	if numSamples == samplesPerChunk/4 {
-		s.nextAt = computeChunkEndTime(c.MinTime, c.MaxTime, s.nextAt)
+		s.nextAt = computeChunkEndTime(c.minTime, c.maxTime, s.nextAt)
 	}
 	if t >= s.nextAt {
 		c = s.cut(t)
@@ -1787,7 +1787,7 @@ func (s *memSeries) Append(t int64, v float64) (success, chunkCreated bool) {
 	}
 	s.app.Append(t, v)
 
-	c.MaxTime = t
+	c.maxTime = t
 
 	s.sampleBuf[0] = s.sampleBuf[1]
 	s.sampleBuf[1] = s.sampleBuf[2]
@@ -1797,70 +1797,6 @@ func (s *memSeries) Append(t int64, v float64) (success, chunkCreated bool) {
 	return true, chunkCreated
 }
 
-func (s *memSeries) Iterator(id int) chunkenc.Iterator {
-	c := s.Chunk(id)
-	// TODO(fabxc): Work around! A querier may have retrieved a pointer to a series' chunk,
-	// which got then garbage collected before it got accessed.
-	// We must ensure to not garbage collect as long as any readers still hold a reference.
-	if c == nil {
-		return chunkenc.NewNopIterator()
-	}
-
-	if id-s.firstChunkID < len(s.chunks)-1 {
-		return c.Chunk.Iterator()
-	}
-	// Serve the last 4 samples for the last chunk from the sample buffer
-	// as their compressed bytes may be mutated by added samples.
-	it := &memSafeIterator{
-		Iterator: c.Chunk.Iterator(),
-		i:        -1,
-		total:    c.Chunk.NumSamples(),
-		buf:      s.sampleBuf,
-	}
-	return it
-}
-
-func (s *memSeries) head() *memChunk {
-	return s.headChunk
-}
-
-type memChunk struct {
-	Chunk            chunkenc.Chunk
-	MinTime, MaxTime int64
-}
-
-// Returns true if the chunk overlaps [mint, maxt].
-func (mc *memChunk) OverlapsClosedInterval(mint, maxt int64) bool {
-	return mc.MinTime <= maxt && mint <= mc.MaxTime
-}
-
-type memSafeIterator struct {
-	chunkenc.Iterator
-
-	i     int
-	total int
-	buf   [4]sample
-}
-
-func (it *memSafeIterator) Next() bool {
-	if it.i+1 >= it.total {
-		return false
-	}
-	it.i++
-	if it.total-it.i > 4 {
-		return it.Iterator.Next()
-	}
-	return true
-}
-
-func (it *memSafeIterator) At() (int64, float64) {
-	if it.total-it.i > 4 {
-		return it.Iterator.At()
-	}
-	s := it.buf[4-(it.total-it.i)]
-	return s.t, s.v
-}
-
 // computeChunkEndTime estimates the end timestamp based the beginning of a chunk,
 // its current timestamp and the upper bound up to which we insert data.
 // It assumes that the time range is 1/4 full.
diff --git a/head_test.go b/head_test.go
index c3a2da71..457076ab 100644
--- a/head_test.go
+++ b/head_test.go
@@ -226,18 +226,18 @@ func TestHead_Truncate(t *testing.T) {
 	s4, _ := h.getOrCreate(4, labels.FromStrings("a", "2", "b", "2", "c", "1"))
 
 	s1.chunks = []*memChunk{
-		{MinTime: 0, MaxTime: 999},
-		{MinTime: 1000, MaxTime: 1999},
-		{MinTime: 2000, MaxTime: 2999},
+		{minTime: 0, maxTime: 999},
+		{minTime: 1000, maxTime: 1999},
+		{minTime: 2000, maxTime: 2999},
 	}
 	s2.chunks = []*memChunk{
-		{MinTime: 1000, MaxTime: 1999},
-		{MinTime: 2000, MaxTime: 2999},
-		{MinTime: 3000, MaxTime: 3999},
+		{minTime: 1000, maxTime: 1999},
+		{minTime: 2000, maxTime: 2999},
+		{minTime: 3000, maxTime: 3999},
 	}
 	s3.chunks = []*memChunk{
-		{MinTime: 0, MaxTime: 999},
-		{MinTime: 1000, MaxTime: 1999},
+		{minTime: 0, maxTime: 999},
+		{minTime: 1000, maxTime: 1999},
 	}
 	s4.chunks = []*memChunk{}
 
@@ -247,12 +247,12 @@ func TestHead_Truncate(t *testing.T) {
 	testutil.Ok(t, h.Truncate(2000))
 
 	testutil.Equals(t, []*memChunk{
-		{MinTime: 2000, MaxTime: 2999},
+		{minTime: 2000, maxTime: 2999},
 	}, h.series.getByID(s1.ref).chunks)
 
 	testutil.Equals(t, []*memChunk{
-		{MinTime: 2000, MaxTime: 2999},
-		{MinTime: 3000, MaxTime: 3999},
+		{minTime: 2000, maxTime: 2999},
+		{minTime: 3000, maxTime: 3999},
 	}, h.series.getByID(s2.ref).chunks)
 
 	testutil.Assert(t, h.series.getByID(s3.ref) == nil, "")
@@ -293,25 +293,25 @@ func TestMemSeries_truncateChunks(t *testing.T) {
 	s := newMemSeries(labels.FromStrings("a", "b"), 1, 2000)
 
 	for i := 0; i < 4000; i += 5 {
-		ok, _ := s.Append(int64(i), float64(i))
+		ok, _ := s.append(int64(i), float64(i))
 		testutil.Assert(t, ok == true, "sample append failed")
 	}
 
 	// Check that truncate removes half of the chunks and afterwards
 	// that the ID of the last chunk still gives us the same chunk afterwards.
 	countBefore := len(s.chunks)
-	lastID := s.ChunkID(countBefore - 1)
-	lastChunk := s.Chunk(lastID)
+	lastID := s.chunkID(countBefore - 1)
+	lastChunk := s.chunk(lastID)
 
-	testutil.Assert(t, s.Chunk(0) != nil, "")
+	testutil.Assert(t, s.chunk(0) != nil, "")
 	testutil.Assert(t, lastChunk != nil, "")
 
-	s.TruncateChunksBefore(2000)
+	s.truncateChunksBefore(2000)
 
-	testutil.Equals(t, int64(2000), s.chunks[0].MinTime)
-	testutil.Assert(t, s.Chunk(0) == nil, "first chunks not gone")
+	testutil.Equals(t, int64(2000), s.chunks[0].minTime)
+	testutil.Assert(t, s.chunk(0) == nil, "first chunks not gone")
 	testutil.Equals(t, countBefore/2, len(s.chunks))
-	testutil.Equals(t, lastChunk, s.Chunk(lastID))
+	testutil.Equals(t, lastChunk, s.chunk(lastID))
 
 	// Validate that the series' sample buffer is applied correctly to the last chunk
 	// after truncation.
@@ -859,29 +859,29 @@ func TestMemSeries_append(t *testing.T) {
 	// Add first two samples at the very end of a chunk range and the next two
 	// on and after it.
 	// New chunk must correctly be cut at 1000.
-	ok, chunkCreated := s.Append(998, 1)
+	ok, chunkCreated := s.append(998, 1)
 	testutil.Assert(t, ok, "append failed")
 	testutil.Assert(t, chunkCreated, "first sample created chunk")
 
-	ok, chunkCreated = s.Append(999, 2)
+	ok, chunkCreated = s.append(999, 2)
 	testutil.Assert(t, ok, "append failed")
 	testutil.Assert(t, !chunkCreated, "second sample should use same chunk")
 
-	ok, chunkCreated = s.Append(1000, 3)
+	ok, chunkCreated = s.append(1000, 3)
 	testutil.Assert(t, ok, "append failed")
 	testutil.Assert(t, chunkCreated, "expected new chunk on boundary")
 
-	ok, chunkCreated = s.Append(1001, 4)
+	ok, chunkCreated = s.append(1001, 4)
 	testutil.Assert(t, ok, "append failed")
 	testutil.Assert(t, !chunkCreated, "second sample should use same chunk")
 
-	testutil.Assert(t, s.chunks[0].MinTime == 998 && s.chunks[0].MaxTime == 999, "wrong chunk range")
-	testutil.Assert(t, s.chunks[1].MinTime == 1000 && s.chunks[1].MaxTime == 1001, "wrong chunk range")
+	testutil.Assert(t, s.chunks[0].minTime == 998 && s.chunks[0].maxTime == 999, "wrong chunk range")
+	testutil.Assert(t, s.chunks[1].minTime == 1000 && s.chunks[1].maxTime == 1001, "wrong chunk range")
 
 	// Fill the range [1000,2000) with many samples. Intermediate chunks should be cut
 	// at approximately 120 samples per chunk.
 	for i := 1; i < 1000; i++ {
-		ok, _ := s.Append(1001+int64(i), float64(i))
+		ok, _ := s.append(1001+int64(i), float64(i))
 		testutil.Assert(t, ok, "append failed")
 	}
 
@@ -889,7 +889,7 @@ func TestMemSeries_append(t *testing.T) {
 
 	// All chunks but the first and last should now be moderately full.
 	for i, c := range s.chunks[1 : len(s.chunks)-1] {
-		testutil.Assert(t, c.Chunk.NumSamples() > 100, "unexpected small chunk %d of length %d", i, c.Chunk.NumSamples())
+		testutil.Assert(t, c.chunk.NumSamples() > 100, "unexpected small chunk %d of length %d", i, c.chunk.NumSamples())
 	}
 }
 
@@ -903,8 +903,8 @@ func TestGCChunkAccess(t *testing.T) {
 
 	s, _ := h.getOrCreate(1, labels.FromStrings("a", "1"))
 	s.chunks = []*memChunk{
-		{MinTime: 0, MaxTime: 999},
-		{MinTime: 1000, MaxTime: 1999},
+		{minTime: 0, maxTime: 999},
+		{minTime: 1000, maxTime: 1999},
 	}
 
 	idx := h.indexRange(0, 1500)
@@ -943,8 +943,8 @@ func TestGCSeriesAccess(t *testing.T) {
 
 	s, _ := h.getOrCreate(1, labels.FromStrings("a", "1"))
 	s.chunks = []*memChunk{
-		{MinTime: 0, MaxTime: 999},
-		{MinTime: 1000, MaxTime: 1999},
+		{minTime: 0, maxTime: 999},
+		{minTime: 1000, maxTime: 1999},
 	}
 
 	idx := h.indexRange(0, 2000)
diff --git a/querier.go b/querier.go
index d444f064..a2ef40d5 100644
--- a/querier.go
+++ b/querier.go
@@ -1070,7 +1070,7 @@ type chunkSeriesIterator struct {
 	intervals tombstones.Intervals
 }
 
-func newChunkSeriesIterator(cs []chunks.Meta, dranges Intervals, mint, maxt int64) *chunkSeriesIterator {
+func newChunkSeriesIterator(cs []chunks.Meta, dranges tombstones.Intervals, mint, maxt int64) *chunkSeriesIterator {
 	csi := &chunkSeriesIterator{
 		chunks: cs,
 		i:      0,
diff --git a/record/internal.go b/record/internal.go
deleted file mode 100644
index bec2de49..00000000
--- a/record/internal.go
+++ /dev/null
@@ -1,37 +0,0 @@
-// Copyright 2017 The Prometheus Authors
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-package record
-
-import (
-	"errors"
-
-	"github.com/prometheus/tsdb/labels"
-)
-
-var (
-	// ErrNotFound is returned if a looked up resource was not found. Duplicate ErrNotFound from head.go.
-	ErrNotFound = errors.New("not found")
-)
-
-// RefSeries is the series labels with the series ID.
-type RefSeries struct {
-	Ref    uint64
-	Labels labels.Labels
-}
-
-// RefSample is a timestamp/value pair associated with a reference to a series.
-type RefSample struct {
-	Ref uint64
-	T   int64
-	V   float64
-}
diff --git a/record/record.go b/record/record.go
index 5ab75111..2bc2bc4f 100644
--- a/record/record.go
+++ b/record/record.go
@@ -38,6 +38,24 @@ const (
 	Tombstones Type = 3
 )
 
+var (
+	// ErrNotFound is returned if a looked up resource was not found. Duplicate ErrNotFound from head.go.
+	ErrNotFound = errors.New("not found")
+)
+
+// RefSeries is the series labels with the series ID.
+type RefSeries struct {
+	Ref    uint64
+	Labels labels.Labels
+}
+
+// RefSample is a timestamp/value pair associated with a reference to a series.
+type RefSample struct {
+	Ref uint64
+	T   int64
+	V   float64
+}
+
 // Decoder decodes series, sample, and tombstone records.
 // The zero value is ready to use.
 type Decoder struct {

From c392f574b6e43c779c4e7cdd58eb6c03f6421f1d Mon Sep 17 00:00:00 2001
From: Callum Styan <callumstyan@gmail.com>
Date: Wed, 17 Jul 2019 14:40:57 -0700
Subject: [PATCH 13/16] These comments don't need to change.

Signed-off-by: Callum Styan <callumstyan@gmail.com>
---
 head.go | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/head.go b/head.go
index c309aba3..00296ed2 100644
--- a/head.go
+++ b/head.go
@@ -392,7 +392,7 @@ func (h *Head) loadWAL(r *wal.Reader, multiRef map[uint64]uint64) (err error) {
 				series, created := h.getOrCreateWithID(s.Ref, s.Labels.Hash(), s.Labels)
 
 				if !created {
-					// There's already a different Ref for this series.
+					// There's already a different ref for this series.
 					multiRefLock.Lock()
 					multiRef[s.Ref] = series.ref
 					multiRefLock.Unlock()
@@ -1691,7 +1691,7 @@ func (s *memSeries) chunksMetas() []chunks.Meta {
 }
 
 // reset re-initialises all the variable in the memSeries except 'lset', 'ref',
-// and 'chunkRange', like how it would appear after 'newmemSeries(...)'.
+// and 'chunkRange', like how it would appear after 'newMemSeries(...)'.
 func (s *memSeries) reset() {
 	s.chunks = nil
 	s.headChunk = nil
@@ -1702,7 +1702,7 @@ func (s *memSeries) reset() {
 	s.app = nil
 }
 
-// Appendable checks whether the given sample is valid for appending to the series.
+// appendable checks whether the given sample is valid for appending to the series.
 func (s *memSeries) appendable(t int64, v float64) error {
 	c := s.head()
 	if c == nil {
@@ -1735,7 +1735,7 @@ func (s *memSeries) chunkID(pos int) int {
 	return pos + s.firstChunkID
 }
 
-// TruncateChunksBefore removes all chunks from the series that have not timestamp
+// truncateChunksBefore removes all chunks from the series that have not timestamp
 // at or after mint. Chunk IDs remain unchanged.
 func (s *memSeries) truncateChunksBefore(mint int64) (removed int) {
 	var k int
@@ -1756,7 +1756,7 @@ func (s *memSeries) truncateChunksBefore(mint int64) (removed int) {
 	return k
 }
 
-// Append adds the sample (t, v) to the series.
+// append adds the sample (t, v) to the series.
 func (s *memSeries) append(t int64, v float64) (success, chunkCreated bool) {
 	// Based on Gorilla white papers this offers near-optimal compression ratio
 	// so anything bigger that this has diminishing returns and increases

From 5264c568139f92896220d83f69ba297a6b6f2918 Mon Sep 17 00:00:00 2001
From: Callum Styan <callumstyan@gmail.com>
Date: Thu, 18 Jul 2019 06:13:25 -0700
Subject: [PATCH 14/16] Update WAL watcher from Prometheus repo, pass metrics
 structs around for the live reader, expose WriteTo interface.

Signed-off-by: Callum Styan <callumstyan@gmail.com>
---
 wal/watcher.go      |  77 +++--
 wal/watcher_test.go | 729 +++++++++++++++++++++++---------------------
 2 files changed, 415 insertions(+), 391 deletions(-)

diff --git a/wal/watcher.go b/wal/watcher.go
index b3732aab..c29fdc28 100644
--- a/wal/watcher.go
+++ b/wal/watcher.go
@@ -39,45 +39,34 @@ const (
 	consumer           = "consumer"
 )
 
-type watcherMetrics struct {
-	recordsRead           *prometheus.CounterVec
-	recordDecodeFails     *prometheus.CounterVec
-	samplesSentPreTailing *prometheus.CounterVec
-	currentSegment        *prometheus.GaugeVec
-}
-
-var (
-	lrMetrics = NewLiveReaderMetrics(prometheus.DefaultRegisterer)
-)
-
+// fromTime returns a new millisecond timestamp from a time.
 // This function is copied from prometheus/prometheus/pkg/timestamp to avoid adding vendor to TSDB repo.
-
-// FromTime returns a new millisecond timestamp from a time.
-func FromTime(t time.Time) int64 {
+func fromTime(t time.Time) int64 {
 	return t.Unix()*1000 + int64(t.Nanosecond())/int64(time.Millisecond)
 }
 
-// func init() {
-// 	prometheus.MustRegister(watcherRecordsRead)
-// 	prometheus.MustRegister(watcherRecordDecodeFails)
-// 	prometheus.MustRegister(watcherSamplesSentPreTailing)
-// 	prometheus.MustRegister(watcherCurrentSegment)
-// }
-
-type writeTo interface {
+type WriteTo interface {
 	Append([]record.RefSample) bool
 	StoreSeries([]record.RefSeries, int)
 	SeriesReset(int)
 }
 
+type watcherMetrics struct {
+	recordsRead           *prometheus.CounterVec
+	recordDecodeFails     *prometheus.CounterVec
+	samplesSentPreTailing *prometheus.CounterVec
+	currentSegment        *prometheus.GaugeVec
+}
+
 // Watcher watches the TSDB WAL for a given WriteTo.
 type Watcher struct {
 	name           string
-	writer         writeTo
+	writer         WriteTo
 	logger         log.Logger
 	walDir         string
 	lastCheckpoint string
 	metrics        *watcherMetrics
+	readerMetrics  *liveReaderMetrics
 
 	startTime int64
 
@@ -144,22 +133,24 @@ func NewWatcherMetrics(reg prometheus.Registerer) *watcherMetrics {
 }
 
 // NewWatcher creates a new WAL watcher for a given WriteTo.
-func NewWatcher(logger log.Logger, metrics *watcherMetrics, name string, writer writeTo, walDir string) *Watcher {
+func NewWatcher(reg prometheus.Registerer, logger log.Logger, name string, writer WriteTo, walDir string) *Watcher {
 	if logger == nil {
 		logger = log.NewNopLogger()
 	}
 
-	return &Watcher{
-		logger:  logger,
-		metrics: metrics,
-		writer:  writer,
-		walDir:  path.Join(walDir, "wal"),
-		name:    name,
-		quit:    make(chan struct{}),
-		done:    make(chan struct{}),
+	w := Watcher{
+		logger:        logger,
+		writer:        writer,
+		metrics:       NewWatcherMetrics(reg),
+		readerMetrics: NewLiveReaderMetrics(reg),
+		walDir:        path.Join(walDir, "wal"),
+		name:          name,
+		quit:          make(chan struct{}),
+		done:          make(chan struct{}),
 
 		maxSegment: -1,
 	}
+	return &w
 }
 
 func (w *Watcher) setMetrics() {
@@ -175,7 +166,7 @@ func (w *Watcher) setMetrics() {
 // Start the Watcher.
 func (w *Watcher) Start() {
 	w.setMetrics()
-	level.Info(w.logger).Log("msg", "starting WAL watcher", "queue", w.name)
+	level.Info(w.logger).Log("msg", "starting WAL watcher", "consumer", w.name)
 
 	go w.loop()
 }
@@ -200,7 +191,7 @@ func (w *Watcher) loop() {
 
 	// We may encourter failures processing the WAL; we should wait and retry.
 	for !isClosed(w.quit) {
-		w.startTime = FromTime(time.Now())
+		w.startTime = fromTime(time.Now())
 		if err := w.run(); err != nil {
 			level.Error(w.logger).Log("msg", "error tailing WAL", "err", err)
 		}
@@ -263,7 +254,7 @@ func (w *Watcher) run() error {
 func (w *Watcher) findSegmentForIndex(index int) (int, error) {
 	refs, err := w.segments(w.walDir)
 	if err != nil {
-		return -1, nil
+		return -1, err
 	}
 
 	for _, r := range refs {
@@ -278,7 +269,7 @@ func (w *Watcher) findSegmentForIndex(index int) (int, error) {
 func (w *Watcher) firstAndLast() (int, int, error) {
 	refs, err := w.segments(w.walDir)
 	if err != nil {
-		return -1, -1, nil
+		return -1, -1, err
 	}
 
 	if len(refs) == 0 {
@@ -323,7 +314,7 @@ func (w *Watcher) watch(segmentNum int, tail bool) error {
 	}
 	defer segment.Close()
 
-	reader := NewLiveReader(w.logger, lrMetrics, segment)
+	reader := NewLiveReader(w.logger, w.readerMetrics, segment)
 
 	readTicker := time.NewTicker(readPeriod)
 	defer readTicker.Stop()
@@ -448,11 +439,12 @@ func (w *Watcher) readSegment(r *LiveReader, segmentNum int, tail bool) error {
 		dec     record.Decoder
 		series  []record.RefSeries
 		samples []record.RefSample
+		send    []record.RefSample
 	)
 
 	for r.Next() && !isClosed(w.quit) {
 		rec := r.Record()
-		w.recordsReadMetric.WithLabelValues(Type(dec.Type(rec))).Inc()
+		w.recordsReadMetric.WithLabelValues(recordType(dec.Type(rec))).Inc()
 
 		switch dec.Type(rec) {
 		case record.Series:
@@ -474,7 +466,6 @@ func (w *Watcher) readSegment(r *LiveReader, segmentNum int, tail bool) error {
 				w.recordDecodeFailsMetric.Inc()
 				return err
 			}
-			var send []record.RefSample
 			for _, s := range samples {
 				if s.T > w.startTime {
 					send = append(send, s)
@@ -483,6 +474,7 @@ func (w *Watcher) readSegment(r *LiveReader, segmentNum int, tail bool) error {
 			if len(send) > 0 {
 				// Blocks  until the sample is sent to all remote write endpoints or closed (because enqueue blocks).
 				w.writer.Append(send)
+				send = send[:0]
 			}
 
 		case record.Tombstones:
@@ -498,7 +490,7 @@ func (w *Watcher) readSegment(r *LiveReader, segmentNum int, tail bool) error {
 	return r.Err()
 }
 
-func Type(rt record.Type) string {
+func recordType(rt record.Type) string {
 	switch rt {
 	case record.Invalid:
 		return "invalid"
@@ -538,7 +530,7 @@ func (w *Watcher) readCheckpoint(checkpointDir string) error {
 		}
 		defer sr.Close()
 
-		r := NewLiveReader(w.logger, lrMetrics, sr)
+		r := NewLiveReader(w.logger, w.readerMetrics, sr)
 		if err := w.readSegment(r, index, false); err != io.EOF && err != nil {
 			return errors.Wrap(err, "readSegment")
 		}
@@ -554,7 +546,8 @@ func (w *Watcher) readCheckpoint(checkpointDir string) error {
 
 func checkpointNum(dir string) (int, error) {
 	// Checkpoint dir names are in the format checkpoint.000001
-	chunks := strings.Split(dir, ".")
+	// dir may contain a hidden directory, so only check the base directory
+	chunks := strings.Split(path.Base(dir), ".")
 	if len(chunks) != 2 {
 		return 0, errors.Errorf("invalid checkpoint dir string: %s", dir)
 	}
diff --git a/wal/watcher_test.go b/wal/watcher_test.go
index ab80db85..216793a6 100644
--- a/wal/watcher_test.go
+++ b/wal/watcher_test.go
@@ -92,71 +92,75 @@ func TestTailSamples(t *testing.T) {
 	pageSize := 32 * 1024
 	const seriesCount = 10
 	const samplesCount = 250
-	now := time.Now()
-
-	dir, err := ioutil.TempDir("", "readCheckpoint")
-	testutil.Ok(t, err)
-	defer os.RemoveAll(dir)
-
-	wdir := path.Join(dir, "wal")
-	err = os.Mkdir(wdir, 0777)
-	testutil.Ok(t, err)
-
-	enc := record.Encoder{}
-	w, err := NewSize(nil, prometheus.DefaultRegisterer, wdir, 128*pageSize, false)
-	testutil.Ok(t, err)
-
-	// Write to the initial segment then checkpoint.
-	for i := 0; i < seriesCount; i++ {
-		ref := i + 100
-		series := enc.Series([]record.RefSeries{
-			record.RefSeries{
-				Ref:    uint64(ref),
-				Labels: labels.Labels{labels.Label{Name: "__name__", Value: fmt.Sprintf("metric_%d", i)}},
-			},
-		}, nil)
-		testutil.Ok(t, w.Log(series))
-
-		for j := 0; j < samplesCount; j++ {
-			inner := rand.Intn(ref + 1)
-			sample := enc.Samples([]record.RefSample{
-				record.RefSample{
-					Ref: uint64(inner),
-					T:   int64(now.UnixNano()) + 1,
-					V:   float64(i),
-				},
-			}, nil)
-			testutil.Ok(t, w.Log(sample))
-		}
-	}
+	for _, compress := range []bool{false, true} {
+		t.Run(fmt.Sprintf("compress=%t", compress), func(t *testing.T) {
+			now := time.Now()
+
+			dir, err := ioutil.TempDir("", "readCheckpoint")
+			testutil.Ok(t, err)
+			defer os.RemoveAll(dir)
+
+			wdir := path.Join(dir, "wal")
+			err = os.Mkdir(wdir, 0777)
+			testutil.Ok(t, err)
+
+			enc := record.Encoder{}
+			w, err := NewSize(nil, nil, wdir, 128*pageSize, compress)
+			testutil.Ok(t, err)
+
+			// Write to the initial segment then checkpoint.
+			for i := 0; i < seriesCount; i++ {
+				ref := i + 100
+				series := enc.Series([]record.RefSeries{
+					record.RefSeries{
+						Ref:    uint64(ref),
+						Labels: labels.Labels{labels.Label{Name: "__name__", Value: fmt.Sprintf("metric_%d", i)}},
+					},
+				}, nil)
+				testutil.Ok(t, w.Log(series))
+
+				for j := 0; j < samplesCount; j++ {
+					inner := rand.Intn(ref + 1)
+					sample := enc.Samples([]record.RefSample{
+						record.RefSample{
+							Ref: uint64(inner),
+							T:   int64(now.UnixNano()) + 1,
+							V:   float64(i),
+						},
+					}, nil)
+					testutil.Ok(t, w.Log(sample))
+				}
+			}
 
-	// Start read after checkpoint, no more data written.
-	first, last, err := w.Segments()
-	testutil.Ok(t, err)
+			// Start read after checkpoint, no more data written.
+			first, last, err := w.Segments()
+			testutil.Ok(t, err)
 
-	wt := newWriteToMock()
-	watcher := NewWatcher(nil, NewWatcherMetrics(prometheus.DefaultRegisterer), "", wt, dir)
-	watcher.startTime = now.UnixNano()
+			wt := newWriteToMock()
+			watcher := NewWatcher(prometheus.DefaultRegisterer, nil, "", wt, dir)
+			watcher.startTime = now.UnixNano()
 
-	// Set the Watcher's metrics so they're not nil pointers.
-	watcher.setMetrics()
-	for i := first; i <= last; i++ {
-		segment, err := OpenReadSegment(SegmentName(watcher.walDir, i))
-		testutil.Ok(t, err)
-		defer segment.Close()
+			// Set the Watcher's metrics so they're not nil pointers.
+			watcher.setMetrics()
+			for i := first; i <= last; i++ {
+				segment, err := OpenReadSegment(SegmentName(watcher.walDir, i))
+				testutil.Ok(t, err)
+				defer segment.Close()
 
-		reader := NewLiveReader(nil, nil, segment)
-		// Use tail true so we can ensure we got the right number of samples.
-		watcher.readSegment(reader, i, true)
-	}
+				reader := NewLiveReader(nil, watcher.readerMetrics, segment)
+				// Use tail true so we can ensure we got the right number of samples.
+				watcher.readSegment(reader, i, true)
+			}
 
-	expectedSeries := seriesCount
-	expectedSamples := seriesCount * samplesCount
-	retry(t, defaultRetryInterval, defaultRetries, func() bool {
-		return wt.checkNumLabels() >= expectedSeries
-	})
-	testutil.Equals(t, expectedSeries, wt.checkNumLabels())
-	testutil.Equals(t, expectedSamples, wt.samplesAppended)
+			expectedSeries := seriesCount
+			expectedSamples := seriesCount * samplesCount
+			retry(t, defaultRetryInterval, defaultRetries, func() bool {
+				return wt.checkNumLabels() >= expectedSeries
+			})
+			testutil.Equals(t, expectedSeries, wt.checkNumLabels())
+			testutil.Equals(t, expectedSamples, wt.samplesAppended)
+		})
+	}
 }
 
 func TestReadToEndNoCheckpoint(t *testing.T) {
@@ -164,61 +168,65 @@ func TestReadToEndNoCheckpoint(t *testing.T) {
 	const seriesCount = 10
 	const samplesCount = 250
 
-	dir, err := ioutil.TempDir("", "readToEnd_noCheckpoint")
-	testutil.Ok(t, err)
-	defer os.RemoveAll(dir)
-	wdir := path.Join(dir, "wal")
-	err = os.Mkdir(wdir, 0777)
-	testutil.Ok(t, err)
-
-	w, err := NewSize(nil, nil, wdir, 128*pageSize, false)
-	testutil.Ok(t, err)
-
-	var recs [][]byte
-
-	enc := record.Encoder{}
-
-	for i := 0; i < seriesCount; i++ {
-		series := enc.Series([]record.RefSeries{
-			record.RefSeries{
-				Ref:    uint64(i),
-				Labels: labels.Labels{labels.Label{Name: "__name__", Value: fmt.Sprintf("metric_%d", i)}},
-			},
-		}, nil)
-		recs = append(recs, series)
-		for j := 0; j < samplesCount; j++ {
-			sample := enc.Samples([]record.RefSample{
-				record.RefSample{
-					Ref: uint64(j),
-					T:   int64(i),
-					V:   float64(i),
-				},
-			}, nil)
-
-			recs = append(recs, sample)
-
-			// Randomly batch up records.
-			if rand.Intn(4) < 3 {
-				testutil.Ok(t, w.Log(recs...))
-				recs = recs[:0]
-			}
-		}
-	}
-	testutil.Ok(t, w.Log(recs...))
+	for _, compress := range []bool{false, true} {
+		t.Run(fmt.Sprintf("compress=%t", compress), func(t *testing.T) {
+			dir, err := ioutil.TempDir("", "readToEnd_noCheckpoint")
+			testutil.Ok(t, err)
+			defer os.RemoveAll(dir)
+			wdir := path.Join(dir, "wal")
+			err = os.Mkdir(wdir, 0777)
+			testutil.Ok(t, err)
+
+			w, err := NewSize(nil, nil, wdir, 128*pageSize, compress)
+			testutil.Ok(t, err)
 
-	_, _, err = w.Segments()
-	testutil.Ok(t, err)
+			var recs [][]byte
 
-	wt := newWriteToMock()
-	watcher := NewWatcher(nil, NewWatcherMetrics(prometheus.DefaultRegisterer), "", wt, dir)
-	go watcher.Start()
+			enc := record.Encoder{}
 
-	expected := seriesCount
-	retry(t, defaultRetryInterval, defaultRetries, func() bool {
-		return wt.checkNumLabels() >= expected
-	})
-	watcher.Stop()
-	testutil.Equals(t, expected, wt.checkNumLabels())
+			for i := 0; i < seriesCount; i++ {
+				series := enc.Series([]record.RefSeries{
+					record.RefSeries{
+						Ref:    uint64(i),
+						Labels: labels.Labels{labels.Label{Name: "__name__", Value: fmt.Sprintf("metric_%d", i)}},
+					},
+				}, nil)
+				recs = append(recs, series)
+				for j := 0; j < samplesCount; j++ {
+					sample := enc.Samples([]record.RefSample{
+						record.RefSample{
+							Ref: uint64(j),
+							T:   int64(i),
+							V:   float64(i),
+						},
+					}, nil)
+
+					recs = append(recs, sample)
+
+					// Randomly batch up records.
+					if rand.Intn(4) < 3 {
+						testutil.Ok(t, w.Log(recs...))
+						recs = recs[:0]
+					}
+				}
+			}
+			testutil.Ok(t, w.Log(recs...))
+
+			_, _, err = w.Segments()
+			testutil.Ok(t, err)
+
+			wt := newWriteToMock()
+			watcher := NewWatcher(prometheus.DefaultRegisterer, nil, "", wt, dir)
+			go watcher.Start()
+
+			expected := seriesCount
+			retry(t, defaultRetryInterval, defaultRetries, func() bool {
+				return wt.checkNumLabels() >= expected
+			})
+			watcher.Stop()
+			testutil.Equals(t, expected, wt.checkNumLabels())
+		})
+	}
 }
 
 func TestReadToEndWithCheckpoint(t *testing.T) {
@@ -228,79 +236,83 @@ func TestReadToEndWithCheckpoint(t *testing.T) {
 	const seriesCount = 10
 	const samplesCount = 250
 
-	dir, err := ioutil.TempDir("", "readToEnd_withCheckpoint")
-	testutil.Ok(t, err)
-	defer os.RemoveAll(dir)
-
-	wdir := path.Join(dir, "wal")
-	err = os.Mkdir(wdir, 0777)
-	testutil.Ok(t, err)
-
-	enc := record.Encoder{}
-	w, err := NewSize(nil, nil, wdir, segmentSize, false)
-	testutil.Ok(t, err)
-
-	// Write to the initial segment then checkpoint.
-	for i := 0; i < seriesCount; i++ {
-		ref := i + 100
-		series := enc.Series([]record.RefSeries{
-			record.RefSeries{
-				Ref:    uint64(ref),
-				Labels: labels.Labels{labels.Label{Name: "__name__", Value: fmt.Sprintf("metric_%d", i)}},
-			},
-		}, nil)
-		testutil.Ok(t, w.Log(series))
-
-		for j := 0; j < samplesCount; j++ {
-			inner := rand.Intn(ref + 1)
-			sample := enc.Samples([]record.RefSample{
-				record.RefSample{
-					Ref: uint64(inner),
-					T:   int64(i),
-					V:   float64(i),
-				},
-			}, nil)
-			testutil.Ok(t, w.Log(sample))
-		}
-	}
+	for _, compress := range []bool{false, true} {
+		t.Run(fmt.Sprintf("compress=%t", compress), func(t *testing.T) {
+			dir, err := ioutil.TempDir("", "readToEnd_withCheckpoint")
+			testutil.Ok(t, err)
+			defer os.RemoveAll(dir)
+
+			wdir := path.Join(dir, "wal")
+			err = os.Mkdir(wdir, 0777)
+			testutil.Ok(t, err)
+
+			enc := record.Encoder{}
+			w, err := NewSize(nil, nil, wdir, segmentSize, compress)
+			testutil.Ok(t, err)
+
+			// Write to the initial segment then checkpoint.
+			for i := 0; i < seriesCount; i++ {
+				ref := i + 100
+				series := enc.Series([]record.RefSeries{
+					record.RefSeries{
+						Ref:    uint64(ref),
+						Labels: labels.Labels{labels.Label{Name: "__name__", Value: fmt.Sprintf("metric_%d", i)}},
+					},
+				}, nil)
+				testutil.Ok(t, w.Log(series))
+
+				for j := 0; j < samplesCount; j++ {
+					inner := rand.Intn(ref + 1)
+					sample := enc.Samples([]record.RefSample{
+						record.RefSample{
+							Ref: uint64(inner),
+							T:   int64(i),
+							V:   float64(i),
+						},
+					}, nil)
+					testutil.Ok(t, w.Log(sample))
+				}
+			}
 
-	Checkpoint(w, 0, 1, func(x uint64) bool { return true }, 0)
-	w.Truncate(1)
-
-	// Write more records after checkpointing.
-	for i := 0; i < seriesCount; i++ {
-		series := enc.Series([]record.RefSeries{
-			record.RefSeries{
-				Ref:    uint64(i),
-				Labels: labels.Labels{labels.Label{Name: "__name__", Value: fmt.Sprintf("metric_%d", i)}},
-			},
-		}, nil)
-		testutil.Ok(t, w.Log(series))
-
-		for j := 0; j < samplesCount; j++ {
-			sample := enc.Samples([]record.RefSample{
-				record.RefSample{
-					Ref: uint64(j),
-					T:   int64(i),
-					V:   float64(i),
-				},
-			}, nil)
-			testutil.Ok(t, w.Log(sample))
-		}
-	}
+			Checkpoint(w, 0, 1, func(x uint64) bool { return true }, 0)
+			w.Truncate(1)
+
+			// Write more records after checkpointing.
+			for i := 0; i < seriesCount; i++ {
+				series := enc.Series([]record.RefSeries{
+					record.RefSeries{
+						Ref:    uint64(i),
+						Labels: labels.Labels{labels.Label{Name: "__name__", Value: fmt.Sprintf("metric_%d", i)}},
+					},
+				}, nil)
+				testutil.Ok(t, w.Log(series))
+
+				for j := 0; j < samplesCount; j++ {
+					sample := enc.Samples([]record.RefSample{
+						record.RefSample{
+							Ref: uint64(j),
+							T:   int64(i),
+							V:   float64(i),
+						},
+					}, nil)
+					testutil.Ok(t, w.Log(sample))
+				}
+			}
 
-	_, _, err = w.Segments()
-	testutil.Ok(t, err)
-	wt := newWriteToMock()
-	watcher := NewWatcher(nil, NewWatcherMetrics(prometheus.DefaultRegisterer), "", wt, dir)
-	go watcher.Start()
-
-	expected := seriesCount * 2
-	retry(t, defaultRetryInterval, defaultRetries, func() bool {
-		return wt.checkNumLabels() >= expected
-	})
-	watcher.Stop()
-	testutil.Equals(t, expected, wt.checkNumLabels())
+			_, _, err = w.Segments()
+			testutil.Ok(t, err)
+			wt := newWriteToMock()
+			watcher := NewWatcher(prometheus.DefaultRegisterer, nil, "", wt, dir)
+			go watcher.Start()
+
+			expected := seriesCount * 2
+			retry(t, defaultRetryInterval, defaultRetries, func() bool {
+				return wt.checkNumLabels() >= expected
+			})
+			watcher.Stop()
+			testutil.Equals(t, expected, wt.checkNumLabels())
+		})
+	}
 }
 
 func TestReadCheckpoint(t *testing.T) {
@@ -308,61 +320,65 @@ func TestReadCheckpoint(t *testing.T) {
 	const seriesCount = 10
 	const samplesCount = 250
 
-	dir, err := ioutil.TempDir("", "readCheckpoint")
-	testutil.Ok(t, err)
-	defer os.RemoveAll(dir)
-
-	wdir := path.Join(dir, "wal")
-	err = os.Mkdir(wdir, 0777)
-	testutil.Ok(t, err)
-
-	os.Create(SegmentName(wdir, 30))
-
-	enc := record.Encoder{}
-	w, err := NewSize(nil, nil, wdir, 128*pageSize, false)
-	testutil.Ok(t, err)
-
-	// Write to the initial segment then checkpoint.
-	for i := 0; i < seriesCount; i++ {
-		ref := i + 100
-		series := enc.Series([]record.RefSeries{
-			record.RefSeries{
-				Ref:    uint64(ref),
-				Labels: labels.Labels{labels.Label{Name: "__name__", Value: fmt.Sprintf("metric_%d", i)}},
-			},
-		}, nil)
-		testutil.Ok(t, w.Log(series))
-
-		for j := 0; j < samplesCount; j++ {
-			inner := rand.Intn(ref + 1)
-			sample := enc.Samples([]record.RefSample{
-				record.RefSample{
-					Ref: uint64(inner),
-					T:   int64(i),
-					V:   float64(i),
-				},
-			}, nil)
-			testutil.Ok(t, w.Log(sample))
-		}
+	for _, compress := range []bool{false, true} {
+		t.Run(fmt.Sprintf("compress=%t", compress), func(t *testing.T) {
+			dir, err := ioutil.TempDir("", "readCheckpoint")
+			testutil.Ok(t, err)
+			defer os.RemoveAll(dir)
+
+			wdir := path.Join(dir, "wal")
+			err = os.Mkdir(wdir, 0777)
+			testutil.Ok(t, err)
+
+			os.Create(SegmentName(wdir, 30))
+
+			enc := record.Encoder{}
+			w, err := NewSize(nil, nil, wdir, 128*pageSize, compress)
+			testutil.Ok(t, err)
+
+			// Write to the initial segment then checkpoint.
+			for i := 0; i < seriesCount; i++ {
+				ref := i + 100
+				series := enc.Series([]record.RefSeries{
+					record.RefSeries{
+						Ref:    uint64(ref),
+						Labels: labels.Labels{labels.Label{Name: "__name__", Value: fmt.Sprintf("metric_%d", i)}},
+					},
+				}, nil)
+				testutil.Ok(t, w.Log(series))
+
+				for j := 0; j < samplesCount; j++ {
+					inner := rand.Intn(ref + 1)
+					sample := enc.Samples([]record.RefSample{
+						record.RefSample{
+							Ref: uint64(inner),
+							T:   int64(i),
+							V:   float64(i),
+						},
+					}, nil)
+					testutil.Ok(t, w.Log(sample))
+				}
+			}
+			Checkpoint(w, 30, 31, func(x uint64) bool { return true }, 0)
+			w.Truncate(32)
+
+			// Start read after checkpoint, no more data written.
+			_, _, err = w.Segments()
+			testutil.Ok(t, err)
+
+			wt := newWriteToMock()
+			watcher := NewWatcher(prometheus.DefaultRegisterer, nil, "", wt, dir)
+			// watcher.
+			go watcher.Start()
+
+			expectedSeries := seriesCount
+			retry(t, defaultRetryInterval, defaultRetries, func() bool {
+				return wt.checkNumLabels() >= expectedSeries
+			})
+			watcher.Stop()
+			testutil.Equals(t, expectedSeries, wt.checkNumLabels())
+		})
 	}
-	Checkpoint(w, 30, 31, func(x uint64) bool { return true }, 0)
-	w.Truncate(32)
-
-	// Start read after checkpoint, no more data written.
-	_, _, err = w.Segments()
-	testutil.Ok(t, err)
-
-	wt := newWriteToMock()
-	watcher := NewWatcher(nil, NewWatcherMetrics(prometheus.DefaultRegisterer), "", wt, dir)
-	// watcher.
-	go watcher.Start()
-
-	expectedSeries := seriesCount
-	retry(t, defaultRetryInterval, defaultRetries, func() bool {
-		return wt.checkNumLabels() >= expectedSeries
-	})
-	watcher.Stop()
-	testutil.Equals(t, expectedSeries, wt.checkNumLabels())
 }
 
 func TestReadCheckpointMultipleSegments(t *testing.T) {
@@ -372,60 +388,64 @@ func TestReadCheckpointMultipleSegments(t *testing.T) {
 	const seriesCount = 20
 	const samplesCount = 300
 
-	dir, err := ioutil.TempDir("", "readCheckpoint")
-	testutil.Ok(t, err)
-	defer os.RemoveAll(dir)
-
-	wdir := path.Join(dir, "wal")
-	err = os.Mkdir(wdir, 0777)
-	testutil.Ok(t, err)
-
-	enc := record.Encoder{}
-	w, err := NewSize(nil, nil, wdir, pageSize, false)
-	testutil.Ok(t, err)
-
-	// Write a bunch of data.
-	for i := 0; i < segments; i++ {
-		for j := 0; j < seriesCount; j++ {
-			ref := j + (i * 100)
-			series := enc.Series([]record.RefSeries{
-				record.RefSeries{
-					Ref:    uint64(ref),
-					Labels: labels.Labels{labels.Label{Name: "__name__", Value: fmt.Sprintf("metric_%d", j)}},
-				},
-			}, nil)
-			testutil.Ok(t, w.Log(series))
-
-			for k := 0; k < samplesCount; k++ {
-				inner := rand.Intn(ref + 1)
-				sample := enc.Samples([]record.RefSample{
-					record.RefSample{
-						Ref: uint64(inner),
-						T:   int64(i),
-						V:   float64(i),
-					},
-				}, nil)
-				testutil.Ok(t, w.Log(sample))
+	for _, compress := range []bool{false, true} {
+		t.Run(fmt.Sprintf("compress=%t", compress), func(t *testing.T) {
+			dir, err := ioutil.TempDir("", "readCheckpoint")
+			testutil.Ok(t, err)
+			defer os.RemoveAll(dir)
+
+			wdir := path.Join(dir, "wal")
+			err = os.Mkdir(wdir, 0777)
+			testutil.Ok(t, err)
+
+			enc := record.Encoder{}
+			w, err := NewSize(nil, nil, wdir, pageSize, compress)
+			testutil.Ok(t, err)
+
+			// Write a bunch of data.
+			for i := 0; i < segments; i++ {
+				for j := 0; j < seriesCount; j++ {
+					ref := j + (i * 100)
+					series := enc.Series([]record.RefSeries{
+						record.RefSeries{
+							Ref:    uint64(ref),
+							Labels: labels.Labels{labels.Label{Name: "__name__", Value: fmt.Sprintf("metric_%d", j)}},
+						},
+					}, nil)
+					testutil.Ok(t, w.Log(series))
+
+					for k := 0; k < samplesCount; k++ {
+						inner := rand.Intn(ref + 1)
+						sample := enc.Samples([]record.RefSample{
+							record.RefSample{
+								Ref: uint64(inner),
+								T:   int64(i),
+								V:   float64(i),
+							},
+						}, nil)
+						testutil.Ok(t, w.Log(sample))
+					}
+				}
 			}
-		}
-	}
 
-	Checkpoint(w, 0, 4, func(id uint64) bool {
-		return true
-	}, 0)
+			Checkpoint(w, 0, 4, func(id uint64) bool {
+				return true
+			}, 0)
 
-	wt := newWriteToMock()
-	watcher := NewWatcher(nil, NewWatcherMetrics(prometheus.DefaultRegisterer), "", wt, dir)
-	watcher.maxSegment = -1
+			wt := newWriteToMock()
+			watcher := NewWatcher(prometheus.DefaultRegisterer, nil, "", wt, dir)
+			watcher.maxSegment = -1
 
-	// Set the Watcher's metrics so they're not nil pointers.
-	watcher.setMetrics()
+			// Set the Watcher's metrics so they're not nil pointers.
+			watcher.setMetrics()
 
-	lastCheckpoint, _, err := LastCheckpoint(watcher.walDir)
-	testutil.Ok(t, err)
+			lastCheckpoint, _, err := LastCheckpoint(watcher.walDir)
+			testutil.Ok(t, err)
 
-	err = watcher.readCheckpoint(lastCheckpoint)
-	testutil.Ok(t, err)
+			err = watcher.readCheckpoint(lastCheckpoint)
+			testutil.Ok(t, err)
+		})
+	}
 }
 
 func TestCheckpointSeriesReset(t *testing.T) {
@@ -434,71 +454,82 @@ func TestCheckpointSeriesReset(t *testing.T) {
 	// in order to get enough segments for us to checkpoint.
 	const seriesCount = 20
 	const samplesCount = 350
-
-	dir, err := ioutil.TempDir("", "seriesReset")
-	testutil.Ok(t, err)
-	defer os.RemoveAll(dir)
-
-	wdir := path.Join(dir, "wal")
-	err = os.Mkdir(wdir, 0777)
-	testutil.Ok(t, err)
-
-	enc := record.Encoder{}
-	w, err := NewSize(nil, nil, wdir, segmentSize, false)
-	testutil.Ok(t, err)
-
-	// Write to the initial segment, then checkpoint later.
-	for i := 0; i < seriesCount; i++ {
-		ref := i + 100
-		series := enc.Series([]record.RefSeries{
-			record.RefSeries{
-				Ref:    uint64(ref),
-				Labels: labels.Labels{labels.Label{Name: "__name__", Value: fmt.Sprintf("metric_%d", i)}},
-			},
-		}, nil)
-		testutil.Ok(t, w.Log(series))
-
-		for j := 0; j < samplesCount; j++ {
-			inner := rand.Intn(ref + 1)
-			sample := enc.Samples([]record.RefSample{
-				record.RefSample{
-					Ref: uint64(inner),
-					T:   int64(i),
-					V:   float64(i),
-				},
-			}, nil)
-			testutil.Ok(t, w.Log(sample))
-		}
+	testCases := []struct {
+		compress bool
+		segments int
+	}{
+		{compress: false, segments: 14},
+		{compress: true, segments: 13},
 	}
 
-	_, _, err = w.Segments()
-	testutil.Ok(t, err)
-
-	wt := newWriteToMock()
-	watcher := NewWatcher(nil, NewWatcherMetrics(prometheus.DefaultRegisterer), "", wt, dir)
-	watcher.maxSegment = -1
-	go watcher.Start()
-
-	expected := seriesCount
-	retry(t, defaultRetryInterval, defaultRetries, func() bool {
-		return wt.checkNumLabels() >= expected
-	})
-	testutil.Equals(t, seriesCount, wt.checkNumLabels())
-
-	_, err = Checkpoint(w, 2, 4, func(x uint64) bool { return true }, 0)
-	testutil.Ok(t, err)
-
-	err = w.Truncate(5)
-	testutil.Ok(t, err)
-
-	_, cpi, err := LastCheckpoint(path.Join(dir, "wal"))
-	testutil.Ok(t, err)
-	err = watcher.garbageCollectSeries(cpi + 1)
-	testutil.Ok(t, err)
+	for _, tc := range testCases {
+		t.Run(fmt.Sprintf("compress=%t", tc.compress), func(t *testing.T) {
+			dir, err := ioutil.TempDir("", "seriesReset")
+			testutil.Ok(t, err)
+			defer os.RemoveAll(dir)
+
+			wdir := path.Join(dir, "wal")
+			err = os.Mkdir(wdir, 0777)
+			testutil.Ok(t, err)
+
+			enc := record.Encoder{}
+			w, err := NewSize(nil, nil, wdir, segmentSize, tc.compress)
+			testutil.Ok(t, err)
+
+			// Write to the initial segment, then checkpoint later.
+			for i := 0; i < seriesCount; i++ {
+				ref := i + 100
+				series := enc.Series([]record.RefSeries{
+					record.RefSeries{
+						Ref:    uint64(ref),
+						Labels: labels.Labels{labels.Label{Name: "__name__", Value: fmt.Sprintf("metric_%d", i)}},
+					},
+				}, nil)
+				testutil.Ok(t, w.Log(series))
+
+				for j := 0; j < samplesCount; j++ {
+					inner := rand.Intn(ref + 1)
+					sample := enc.Samples([]record.RefSample{
+						record.RefSample{
+							Ref: uint64(inner),
+							T:   int64(i),
+							V:   float64(i),
+						},
+					}, nil)
+					testutil.Ok(t, w.Log(sample))
+				}
+			}
 
-	watcher.Stop()
-	// If you modify the checkpoint and truncate segment #'s run the test to see how
-	// many series records you end up with and change the last Equals check accordingly
-	// or modify the Equals to Assert(len(wt.seriesLabels) < seriesCount*10)
-	testutil.Equals(t, 14, wt.checkNumLabels())
+			_, _, err = w.Segments()
+			testutil.Ok(t, err)
+
+			wt := newWriteToMock()
+			watcher := NewWatcher(prometheus.DefaultRegisterer, nil, "", wt, dir)
+			watcher.maxSegment = -1
+			go watcher.Start()
+
+			expected := seriesCount
+			retry(t, defaultRetryInterval, defaultRetries, func() bool {
+				return wt.checkNumLabels() >= expected
+			})
+			testutil.Equals(t, seriesCount, wt.checkNumLabels())
+
+			_, err = Checkpoint(w, 2, 4, func(x uint64) bool { return true }, 0)
+			testutil.Ok(t, err)
+
+			err = w.Truncate(5)
+			testutil.Ok(t, err)
+
+			_, cpi, err := LastCheckpoint(path.Join(dir, "wal"))
+			testutil.Ok(t, err)
+			err = watcher.garbageCollectSeries(cpi + 1)
+			testutil.Ok(t, err)
+
+			watcher.Stop()
+			// If you modify the checkpoint and truncate segment #'s run the test to see how
+			// many series records you end up with and change the last Equals check accordingly
+			// or modify the Equals to Assert(len(wt.seriesLabels) < seriesCount*10)
+			testutil.Equals(t, tc.segments, wt.checkNumLabels())
+		})
+	}
 }

From 8c40bb9b4abab5513d9909c0262a3cc7aeb86d18 Mon Sep 17 00:00:00 2001
From: Callum Styan <callumstyan@gmail.com>
Date: Mon, 29 Jul 2019 11:11:17 -0700
Subject: [PATCH 15/16] Don't export MemTombstones.

Signed-off-by: Callum Styan <callumstyan@gmail.com>
---
 compact_test.go          | 10 ++++++----
 mocks_test.go            | 10 ++++++----
 querier_test.go          | 11 ++++++-----
 tombstones/tombstones.go | 24 ++++++++++++------------
 4 files changed, 30 insertions(+), 25 deletions(-)

diff --git a/compact_test.go b/compact_test.go
index fe61da24..2e85115b 100644
--- a/compact_test.go
+++ b/compact_test.go
@@ -456,10 +456,12 @@ func metaRange(name string, mint, maxt int64, stats *BlockStats) dirMeta {
 
 type erringBReader struct{}
 
-func (erringBReader) Index() (IndexReader, error)          { return nil, errors.New("index") }
-func (erringBReader) Chunks() (ChunkReader, error)         { return nil, errors.New("chunks") }
-func (erringBReader) Tombstones() (TombstoneReader, error) { return nil, errors.New("tombstones") }
-func (erringBReader) Meta() BlockMeta                      { return BlockMeta{} }
+func (erringBReader) Index() (IndexReader, error)  { return nil, errors.New("index") }
+func (erringBReader) Chunks() (ChunkReader, error) { return nil, errors.New("chunks") }
+func (erringBReader) Tombstones() (tombstones.TombstoneReader, error) {
+	return nil, errors.New("tombstones")
+}
+func (erringBReader) Meta() BlockMeta { return BlockMeta{} }
 
 type nopChunkWriter struct{}
 
diff --git a/mocks_test.go b/mocks_test.go
index 44d8e9a5..c5bc0f6b 100644
--- a/mocks_test.go
+++ b/mocks_test.go
@@ -73,7 +73,9 @@ type mockBReader struct {
 	maxt int64
 }
 
-func (r *mockBReader) Index() (IndexReader, error)          { return r.ir, nil }
-func (r *mockBReader) Chunks() (ChunkReader, error)         { return r.cr, nil }
-func (r *mockBReader) Tombstones() (TombstoneReader, error) { return newMemTombstones(), nil }
-func (r *mockBReader) Meta() BlockMeta                      { return BlockMeta{MinTime: r.mint, MaxTime: r.maxt} }
+func (r *mockBReader) Index() (IndexReader, error)  { return r.ir, nil }
+func (r *mockBReader) Chunks() (ChunkReader, error) { return r.cr, nil }
+func (r *mockBReader) Tombstones() (tombstones.TombstoneReader, error) {
+	return tombstones.NewMemTombstones(), nil
+}
+func (r *mockBReader) Meta() BlockMeta { return BlockMeta{MinTime: r.mint, MaxTime: r.maxt} }
diff --git a/querier_test.go b/querier_test.go
index 2794e0e6..cbbfcdab 100644
--- a/querier_test.go
+++ b/querier_test.go
@@ -413,6 +413,11 @@ func TestBlockQuerierDelete(t *testing.T) {
 		exp        SeriesSet
 	}
 
+	tstones := tombstones.NewMemTombstones()
+	tstones.AddInterval(1, tombstones.Interval{1, 3})
+	tstones.AddInterval(2, tombstones.Interval{1, 3}, tombstones.Interval{6, 10})
+	tstones.AddInterval(3, tombstones.Interval{6, 10})
+
 	cases := struct {
 		data []seriesSamples
 
@@ -461,11 +466,7 @@ func TestBlockQuerierDelete(t *testing.T) {
 				},
 			},
 		},
-		tombstones: &tombstones.MemTombstones{IntvlGroups: map[uint64]tombstones.Intervals{
-			1: tombstones.Intervals{{1, 3}},
-			2: tombstones.Intervals{{1, 3}, {6, 10}},
-			3: tombstones.Intervals{{6, 10}},
-		}},
+		tombstones: tstones,
 		queries: []query{
 			{
 				mint: 2,
diff --git a/tombstones/tombstones.go b/tombstones/tombstones.go
index 8b79632b..e82bccb0 100644
--- a/tombstones/tombstones.go
+++ b/tombstones/tombstones.go
@@ -199,27 +199,27 @@ func ReadTombstones(dir string) (TombstoneReader, int64, error) {
 	return stonesMap, int64(len(b)), nil
 }
 
-type MemTombstones struct {
-	IntvlGroups map[uint64]Intervals
+type memTombstones struct {
+	intvlGroups map[uint64]Intervals
 	mtx         sync.RWMutex
 }
 
 // NewMemTombstones creates new in memory TombstoneReader
 // that allows adding new intervals.
-func NewMemTombstones() *MemTombstones {
-	return &MemTombstones{IntvlGroups: make(map[uint64]Intervals)}
+func NewMemTombstones() *memTombstones {
+	return &memTombstones{intvlGroups: make(map[uint64]Intervals)}
 }
 
-func (t *MemTombstones) Get(ref uint64) (Intervals, error) {
+func (t *memTombstones) Get(ref uint64) (Intervals, error) {
 	t.mtx.RLock()
 	defer t.mtx.RUnlock()
-	return t.IntvlGroups[ref], nil
+	return t.intvlGroups[ref], nil
 }
 
-func (t *MemTombstones) Iter(f func(uint64, Intervals) error) error {
+func (t *memTombstones) Iter(f func(uint64, Intervals) error) error {
 	t.mtx.RLock()
 	defer t.mtx.RUnlock()
-	for ref, ivs := range t.IntvlGroups {
+	for ref, ivs := range t.intvlGroups {
 		if err := f(ref, ivs); err != nil {
 			return err
 		}
@@ -227,23 +227,23 @@ func (t *MemTombstones) Iter(f func(uint64, Intervals) error) error {
 	return nil
 }
 
-func (t *MemTombstones) Total() uint64 {
+func (t *memTombstones) Total() uint64 {
 	t.mtx.RLock()
 	defer t.mtx.RUnlock()
 
 	total := uint64(0)
-	for _, ivs := range t.IntvlGroups {
+	for _, ivs := range t.intvlGroups {
 		total += uint64(len(ivs))
 	}
 	return total
 }
 
 // AddInterval to an existing MemTombstones
-func (t *MemTombstones) AddInterval(ref uint64, itvs ...Interval) {
+func (t *memTombstones) AddInterval(ref uint64, itvs ...Interval) {
 	t.mtx.Lock()
 	defer t.mtx.Unlock()
 	for _, itv := range itvs {
-		t.IntvlGroups[ref] = t.IntvlGroups[ref].Add(itv)
+		t.intvlGroups[ref] = t.intvlGroups[ref].Add(itv)
 	}
 }
 

From 3b20c644aaa1769f149b3d7929ae65f2cb09a669 Mon Sep 17 00:00:00 2001
From: Callum Styan <callumstyan@gmail.com>
Date: Wed, 7 Aug 2019 15:42:45 -0700
Subject: [PATCH 16/16] Export WatcherMetrics struct and pass the struct to
 NewWatcher rather than creating and registering in the constructor.

Signed-off-by: Callum Styan <callumstyan@gmail.com>
---
 wal/watcher.go      | 20 ++++++++++----------
 wal/watcher_test.go | 13 +++++++------
 2 files changed, 17 insertions(+), 16 deletions(-)

diff --git a/wal/watcher.go b/wal/watcher.go
index c29fdc28..fb6e3c64 100644
--- a/wal/watcher.go
+++ b/wal/watcher.go
@@ -51,7 +51,7 @@ type WriteTo interface {
 	SeriesReset(int)
 }
 
-type watcherMetrics struct {
+type WatcherMetrics struct {
 	recordsRead           *prometheus.CounterVec
 	recordDecodeFails     *prometheus.CounterVec
 	samplesSentPreTailing *prometheus.CounterVec
@@ -65,7 +65,7 @@ type Watcher struct {
 	logger         log.Logger
 	walDir         string
 	lastCheckpoint string
-	metrics        *watcherMetrics
+	metrics        *WatcherMetrics
 	readerMetrics  *liveReaderMetrics
 
 	startTime int64
@@ -82,8 +82,8 @@ type Watcher struct {
 	maxSegment int
 }
 
-func NewWatcherMetrics(reg prometheus.Registerer) *watcherMetrics {
-	m := &watcherMetrics{
+func NewWatcherMetrics(reg prometheus.Registerer) *WatcherMetrics {
+	m := &WatcherMetrics{
 		recordsRead: prometheus.NewCounterVec(
 			prometheus.CounterOpts{
 				Namespace: "prometheus",
@@ -123,17 +123,17 @@ func NewWatcherMetrics(reg prometheus.Registerer) *watcherMetrics {
 	}
 
 	if reg != nil {
-		reg.Register(m.recordsRead)
-		reg.Register(m.recordDecodeFails)
-		reg.Register(m.samplesSentPreTailing)
-		reg.Register(m.currentSegment)
+		reg.MustRegister(m.recordsRead)
+		reg.MustRegister(m.recordDecodeFails)
+		reg.MustRegister(m.samplesSentPreTailing)
+		reg.MustRegister(m.currentSegment)
 	}
 
 	return m
 }
 
 // NewWatcher creates a new WAL watcher for a given WriteTo.
-func NewWatcher(reg prometheus.Registerer, logger log.Logger, name string, writer WriteTo, walDir string) *Watcher {
+func NewWatcher(reg prometheus.Registerer, metrics *WatcherMetrics, logger log.Logger, name string, writer WriteTo, walDir string) *Watcher {
 	if logger == nil {
 		logger = log.NewNopLogger()
 	}
@@ -141,7 +141,7 @@ func NewWatcher(reg prometheus.Registerer, logger log.Logger, name string, write
 	w := Watcher{
 		logger:        logger,
 		writer:        writer,
-		metrics:       NewWatcherMetrics(reg),
+		metrics:       metrics,
 		readerMetrics: NewLiveReaderMetrics(reg),
 		walDir:        path.Join(walDir, "wal"),
 		name:          name,
diff --git a/wal/watcher_test.go b/wal/watcher_test.go
index 216793a6..e559464f 100644
--- a/wal/watcher_test.go
+++ b/wal/watcher_test.go
@@ -30,6 +30,7 @@ import (
 
 var defaultRetryInterval = 100 * time.Millisecond
 var defaultRetries = 100
+var wMetrics = NewWatcherMetrics(prometheus.DefaultRegisterer)
 
 // retry executes f() n times at each interval until it returns true.
 func retry(t *testing.T, interval time.Duration, n int, f func() bool) {
@@ -137,7 +138,7 @@ func TestTailSamples(t *testing.T) {
 			testutil.Ok(t, err)
 
 			wt := newWriteToMock()
-			watcher := NewWatcher(prometheus.DefaultRegisterer, nil, "", wt, dir)
+			watcher := NewWatcher(prometheus.DefaultRegisterer, wMetrics, nil, "", wt, dir)
 			watcher.startTime = now.UnixNano()
 
 			// Set the Watcher's metrics so they're not nil pointers.
@@ -216,7 +217,7 @@ func TestReadToEndNoCheckpoint(t *testing.T) {
 			testutil.Ok(t, err)
 
 			wt := newWriteToMock()
-			watcher := NewWatcher(prometheus.DefaultRegisterer, nil, "", wt, dir)
+			watcher := NewWatcher(prometheus.DefaultRegisterer, wMetrics, nil, "", wt, dir)
 			go watcher.Start()
 
 			expected := seriesCount
@@ -302,7 +303,7 @@ func TestReadToEndWithCheckpoint(t *testing.T) {
 			_, _, err = w.Segments()
 			testutil.Ok(t, err)
 			wt := newWriteToMock()
-			watcher := NewWatcher(prometheus.DefaultRegisterer, nil, "", wt, dir)
+			watcher := NewWatcher(prometheus.DefaultRegisterer, wMetrics, nil, "", wt, dir)
 			go watcher.Start()
 
 			expected := seriesCount * 2
@@ -367,7 +368,7 @@ func TestReadCheckpoint(t *testing.T) {
 			testutil.Ok(t, err)
 
 			wt := newWriteToMock()
-			watcher := NewWatcher(prometheus.DefaultRegisterer, nil, "", wt, dir)
+			watcher := NewWatcher(prometheus.DefaultRegisterer, wMetrics, nil, "", wt, dir)
 			// watcher.
 			go watcher.Start()
 
@@ -433,7 +434,7 @@ func TestReadCheckpointMultipleSegments(t *testing.T) {
 			}, 0)
 
 			wt := newWriteToMock()
-			watcher := NewWatcher(prometheus.DefaultRegisterer, nil, "", wt, dir)
+			watcher := NewWatcher(prometheus.DefaultRegisterer, wMetrics, nil, "", wt, dir)
 			watcher.maxSegment = -1
 
 			// Set the Watcher's metrics so they're not nil pointers.
@@ -504,7 +505,7 @@ func TestCheckpointSeriesReset(t *testing.T) {
 			testutil.Ok(t, err)
 
 			wt := newWriteToMock()
-			watcher := NewWatcher(prometheus.DefaultRegisterer, nil, "", wt, dir)
+			watcher := NewWatcher(prometheus.DefaultRegisterer, wMetrics, nil, "", wt, dir)
 			watcher.maxSegment = -1
 			go watcher.Start()