diff --git a/benchplot/plot.go b/benchplot/plot.go index c6f9d05..02af491 100644 --- a/benchplot/plot.go +++ b/benchplot/plot.go @@ -2,6 +2,7 @@ package main import ( "fmt" + "image/color" "math" "github.com/aclements/go-gg/generic/slice" @@ -33,17 +34,14 @@ func plot(t, git table.Grouping, configCols, resultCols []string) (*gg.Plot, int plot.SortBy("commit date") plot.Stat(commitIndex{}) - // Average each result at each commit (but keep columns names - // the same to keep things easier to read). - plot.Stat(ggstat.Agg("commit", "name")(ggstat.AggMean(resultCols...))) - for _, rcol := range resultCols { - plot.SetData(table.Rename(plot.Data(), "mean "+rcol, rcol)) - } - // Unpivot all of the metrics into one column. plot.Stat(convertFloat{resultCols}) plot.SetData(table.Unpivot(plot.Data(), "metric", "result", resultCols...)) - y := "result" + + // Average each result at each commit (but keep columns names + // the same to keep things easier to read). + plot.Stat(ggstat.Agg("commit", "name", "metric", "branch", "commit index")(ggstat.AggMean("result"), ggstat.AggMin("result"), ggstat.AggMax("result"))) + y := "mean result" // Normalize to earliest commit on master. It's important to // do this before the geomean if there are commits missing. @@ -51,33 +49,45 @@ func plot(t, git table.Grouping, configCols, resultCols []string) (*gg.Plot, int // group by name and metric, since the geomean needs to be // done on a different grouping. plot.GroupBy("name", "metric") - plot.Stat(ggstat.Normalize{X: "branch", By: firstMasterIndex, Cols: []string{"result"}}) + plot.Stat(ggstat.Normalize{X: "branch", By: firstMasterIndex, Cols: []string{"mean result", "max result", "min result"}, DenomCols: []string{"mean result", "mean result", "mean result"}}) y = "normalized " + y - plot.SetData(table.Remove(plot.Data(), "result")) + for _, col := range []string{"mean result", "max result", "min result"} { + plot.SetData(table.Remove(plot.Data(), col)) + } plot.SetData(table.Ungroup(table.Ungroup(plot.Data()))) // Compute geomean for each metric at each commit if there's // more than one benchmark. if len(table.GroupBy(t, "name").Tables()) > 1 { gt := removeNaNs(plot.Data(), y) - gt = ggstat.Agg("commit", "metric")(ggstat.AggGeoMean(y)).F(gt) + gt = ggstat.Agg("commit", "metric", "branch", "commit index")(ggstat.AggGeoMean(y), ggstat.AggMin("normalized min result"), ggstat.AggMax("normalized max result")).F(gt) gt = table.MapTables(gt, func(_ table.GroupID, t *table.Table) *table.Table { return table.NewBuilder(t).AddConst("name", " geomean").Done() }) gt = table.Rename(gt, "geomean "+y, y) + gt = table.Rename(gt, "min normalized min result", "normalized min result") + gt = table.Rename(gt, "max normalized max result", "normalized max result") plot.SetData(table.Concat(plot.Data(), gt)) nrows++ } + // Always show Y=0. + plot.SetScale("y", gg.NewLinearScaler().Include(0)) + // Facet by name and metric. - plot.Add(gg.FacetY{Col: "name"}, gg.FacetX{Col: "metric"}) + plot.Add(gg.FacetY{Col: "name"}, gg.FacetX{Col: "metric", SplitYScales: true}) // Filter the data to reduce noise. plot.Stat(kza{y, 15, 3}) y = "filtered " + y - // Always show Y=0. - plot.SetScale("y", gg.NewLinearScaler().Include(0)) + plot.Add(gg.LayerArea{ + X: "commit index", + Upper: "normalized max result", + Lower: "normalized min result", + Fill: plot.Const(color.Gray{192}), + //Color: "branch", + }) plot.Add(gg.LayerLines{ X: "commit index", diff --git a/benchplot/vendor/github.com/aclements/go-gg/generic/slice/sort.go b/benchplot/vendor/github.com/aclements/go-gg/generic/slice/sort.go index 7df2d0e..e5ef8b6 100644 --- a/benchplot/vendor/github.com/aclements/go-gg/generic/slice/sort.go +++ b/benchplot/vendor/github.com/aclements/go-gg/generic/slice/sort.go @@ -7,26 +7,29 @@ package slice import ( "reflect" "sort" + "time" "github.com/aclements/go-gg/generic" ) // CanSort returns whether the value v can be sorted. func CanSort(v interface{}) bool { - if _, ok := v.(sort.Interface); ok { + switch v.(type) { + case sort.Interface, []time.Time: return true } return generic.CanOrderR(reflect.TypeOf(v).Elem().Kind()) } -// Sort sorts v in increasing order. v must implement sort.Interface -// or must be a slice whose elements are orderable. +// Sort sorts v in increasing order. v must implement sort.Interface, +// be a slice whose elements are orderable, or be a []time.Time. func Sort(v interface{}) { sort.Sort(Sorter(v)) } // Sorter returns a sort.Interface for sorting v. v must implement -// sort.Interface or must be a slice whose elements are orderable. +// sort.Interface, be a slice whose elements are orderable, or be a +// []time.Time. func Sorter(v interface{}) sort.Interface { switch v := v.(type) { case []int: @@ -35,6 +38,8 @@ func Sorter(v interface{}) sort.Interface { return sort.Float64Slice(v) case []string: return sort.StringSlice(v) + case []time.Time: + return sortTimeSlice(v) case sort.Interface: return v } @@ -124,3 +129,9 @@ func (s sortStringSlice) Swap(i, j int) { s.Index(i).SetString(b) s.Index(j).SetString(a) } + +type sortTimeSlice []time.Time + +func (s sortTimeSlice) Len() int { return len(s) } +func (s sortTimeSlice) Less(i, j int) bool { return s[i].Before(s[j]) } +func (s sortTimeSlice) Swap(i, j int) { s[i], s[j] = s[j], s[i] } diff --git a/benchplot/vendor/github.com/aclements/go-gg/gg/example_scale_test.go b/benchplot/vendor/github.com/aclements/go-gg/gg/example_scale_test.go new file mode 100644 index 0000000..00eaa8b --- /dev/null +++ b/benchplot/vendor/github.com/aclements/go-gg/gg/example_scale_test.go @@ -0,0 +1,59 @@ +// Copyright 2016 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package gg + +import ( + "fmt" + "math/rand" + "os" + "time" + + "github.com/aclements/go-gg/table" +) + +func ExampleNewTimeScaler() { + var x []time.Time + var y []float64 + var steps []time.Duration + for _, step := range []time.Duration{ + 1e0, 1e1, 1e2, 1e3, 1e4, 1e5, 1e6, 1e7, 1e8, 1e9, + time.Minute, time.Hour, 24 * time.Hour, 7 * 24 * time.Hour, + } { + t := time.Now() + for i := 0; i < 100; i++ { + x = append(x, t) + y = append(y, rand.Float64()-.5) + steps = append(steps, 100*step) + t = t.Add(-step) + } + } + + tb := table.NewBuilder(nil) + tb.Add("x", x).Add("y", y).Add("steps", steps) + + plot := NewPlot(tb.Done()) + + plot.SetScale("x", NewTimeScaler()) + + plot.Add(FacetY{ + Col: "steps", + SplitXScales: true, + }) + + plot.Add(LayerLines{ + X: "x", + Y: "y", + }) + + f, err := os.Create("scale_time.svg") + if err != nil { + panic("unable to create scale_time.svg") + } + defer f.Close() + plot.WriteSVG(f, 800, 1000) + fmt.Println("ok") + // output: + // ok +} diff --git a/benchplot/vendor/github.com/aclements/go-gg/gg/layer.go b/benchplot/vendor/github.com/aclements/go-gg/gg/layer.go index 1e8a150..210bb81 100644 --- a/benchplot/vendor/github.com/aclements/go-gg/gg/layer.go +++ b/benchplot/vendor/github.com/aclements/go-gg/gg/layer.go @@ -145,6 +145,40 @@ func (l LayerPaths) apply(p *Plot, sort bool) { }, p.Data().Tables()}) } +// LayerArea shades the area between two columns with a polygon. It is +// useful in conjunction with ggstat.AggMax and ggstat.AggMin for +// drawing the extents of data. +type LayerArea struct { + // X names the column that defines the input of each point. If + // this is empty, it defaults to the first column. + X string + + // Upper and Lower name columns that define the vertical + // bounds of the shaded area. If either is "", it defaults to + // 0. + Upper, Lower string + + // Fill names a column that defines the fill color of each + // path. If Fill is "", it defaults to none. Otherwise, the + // data is grouped by Fill. + Fill string +} + +func (l LayerArea) Apply(p *Plot) { + defaultCols(p, &l.X) + if l.Fill != "" { + p.GroupBy(l.Fill) + } + defer p.Save().Restore() + p = p.SortBy(l.X) + p.marks = append(p.marks, plotMark{&markArea{ + p.use("x", l.X), + p.use("y", l.Upper), + p.use("y", l.Lower), + p.use("fill", l.Fill), + }, p.Data().Tables()}) +} + // LayerPoints layers a point mark at each data point. type LayerPoints struct { // X and Y name columns that define input and response of each @@ -289,9 +323,11 @@ func (l LayerTooltips) Apply(p *Plot) { // Split up by subplot and flatten each subplot. tables := map[*subplot][]*table.Table{} + gids := map[*subplot]table.GroupID{} for _, gid := range p.Data().Tables() { s := subplotOf(gid) tables[s] = append(tables[s], p.Data().Table(gid)) + gids[s] = gid } var ng table.GroupingBuilder for k, ts := range tables { @@ -299,7 +335,9 @@ func (l LayerTooltips) Apply(p *Plot) { for i, t := range ts { subg.Add(table.RootGroupID.Extend(i), t) } - ng.Add(table.RootGroupID.Extend(k), table.Flatten(subg.Done())) + ngid := table.RootGroupID.Extend(k) + ng.Add(ngid, table.Flatten(subg.Done())) + p.copyScales(gids[k], ngid) } p.SetData(ng.Done()) diff --git a/benchplot/vendor/github.com/aclements/go-gg/gg/layout.go b/benchplot/vendor/github.com/aclements/go-gg/gg/layout.go index 51b9c9e..a0406fc 100644 --- a/benchplot/vendor/github.com/aclements/go-gg/gg/layout.go +++ b/benchplot/vendor/github.com/aclements/go-gg/gg/layout.go @@ -122,6 +122,7 @@ type eltTicks struct { type plotEltTicks struct { major table.Slice + minor table.Slice labels []string } @@ -194,8 +195,8 @@ func (e *eltTicks) computeTicks() { // Optimize ticks, keeping labels at least tickDistance apart. e.ticks = make(map[Scaler]plotEltTicks) for s := range e.scales() { - pred := func(ticks []float64, labels []string) bool { - if len(ticks) <= 1 { + pred := func(ticks, _ table.Slice, labels []string) bool { + if len(labels) <= 1 { return true } // Check distance between labels. @@ -220,8 +221,8 @@ func (e *eltTicks) computeTicks() { return true } - major, _, labels := s.Ticks(maxTicks, pred) - e.ticks[s] = plotEltTicks{major, labels} + major, minor, labels := s.Ticks(maxTicks, pred) + e.ticks[s] = plotEltTicks{major, minor, labels} } } diff --git a/benchplot/vendor/github.com/aclements/go-gg/gg/mark.go b/benchplot/vendor/github.com/aclements/go-gg/gg/mark.go index 8e8946d..bcb2d1f 100644 --- a/benchplot/vendor/github.com/aclements/go-gg/gg/mark.go +++ b/benchplot/vendor/github.com/aclements/go-gg/gg/mark.go @@ -59,6 +59,39 @@ func (m *markPath) mark(env *renderEnv, canvas *svg.SVG) { drawPath(canvas, xs, ys, stroke, fill) } +type markArea struct { + x, upper, lower, fill *scaledData +} + +func rev(data []float64) []float64 { + var rev []float64 + for i := len(data) - 1; i >= 0; i-- { + rev = append(rev, data[i]) + } + return rev +} + +func (m *markArea) mark(env *renderEnv, canvas *svg.SVG) { + xs := env.get(m.x).([]float64) + upper := make([]float64, len(xs)) + if m.upper != nil { + upper = env.get(m.upper).([]float64) + } + lower := make([]float64, len(xs)) + if m.lower != nil { + lower = env.get(m.lower).([]float64) + } + var fill color.Color = color.Black + if m.fill != nil { + fill = env.getFirst(m.fill).(color.Color) + } + + xs = append(xs, rev(xs)...) + ys := append(upper, lower...) + + drawPath(canvas, xs, ys, color.Transparent, fill) +} + type markSteps struct { dir StepMode @@ -150,7 +183,7 @@ func drawPath(canvas *svg.SVG, xs, ys []float64, stroke color.Color, fill color. // XXX Stroke width style := cssPaint("stroke", stroke) + ";" + cssPaint("fill", fill) + ";stroke-width:3" - canvas.Path(string(path), style) + canvas.Path(wrapPath(string(path)), style) } type markPoint struct { @@ -206,7 +239,9 @@ func (m *markTiles) mark(env *renderEnv, canvas *svg.SVG) { // are color.Color? How would this work with an identity // scaler? var fills []color.Color - slice.Convert(&fills, env.get(m.fill)) + if m.fill != nil { + slice.Convert(&fills, env.get(m.fill)) + } // TODO: We can't use an this if the width and height // are specified, or if there is a stroke. @@ -275,11 +310,15 @@ func (m *markTiles) mark(env *renderEnv, canvas *svg.SVG) { // Create the image. iw, ih := round((xmax-xmin+xgap)/xgap), round((ymax-ymin+ygap)/ygap) img := image.NewRGBA(image.Rect(0, 0, iw, ih)) + fill := color.Color(color.Black) for i := range xs { if !isFinite(xs[i]) || !isFinite(ys[i]) { continue } - img.Set(round((xs[i]-xmin)/xgap), round((ys[i]-ymin)/ygap), fills[i]) + if fills != nil { + fill = fills[i] + } + img.Set(round((xs[i]-xmin)/xgap), round((ys[i]-ymin)/ygap), fill) } // Encode the image. @@ -405,12 +444,12 @@ function tooltipMove(evt, data, tid, minx, maxx) { var pt = svg.createSVGPoint(); pt.x = evt.clientX; pt.y = evt.clientY; - var ex = pt.matrixTransform(svg.getScreenCTM().inverse()).x; + var epos = pt.matrixTransform(svg.getScreenCTM().inverse()); // Find data point closest to event coordinate. - var cd = Math.abs(ex-data.x[0]), ci = 0; + var cd = Math.sqrt(Math.pow(epos.x-data.x[0], 2) + Math.pow(epos.y-data.y[0], 2)), ci = 0; for (var i = 1; i < data.x.length; i++) { - var d = Math.abs(ex-data.x[i]); + var d = Math.sqrt(Math.pow(epos.x-data.x[i], 2) + Math.pow(epos.y-data.y[i], 2)); if (d < cd) { cd = d; ci = i; } } diff --git a/benchplot/vendor/github.com/aclements/go-gg/gg/plot.go b/benchplot/vendor/github.com/aclements/go-gg/gg/plot.go index 148adc4..6642349 100644 --- a/benchplot/vendor/github.com/aclements/go-gg/gg/plot.go +++ b/benchplot/vendor/github.com/aclements/go-gg/gg/plot.go @@ -153,8 +153,15 @@ func (p *Plot) getScales(aes string) scalerTree { return st } +func (p *Plot) copyScales(old, new table.GroupID) { + for _, st := range p.scales { + st.scales[new] = st.find(old) + } +} + // SetScale binds a scale to the given visual aesthetic. SetScale is -// shorthand for SetScaleAt(aes, s, table.RootGroupID). +// shorthand for SetScaleAt(aes, s, table.RootGroupID). SetScale must +// be called before Add. // // SetScale returns p for ease of chaining. func (p *Plot) SetScale(aes string, s Scaler) *Plot { @@ -162,7 +169,8 @@ func (p *Plot) SetScale(aes string, s Scaler) *Plot { } // SetScaleAt binds a scale to the given visual aesthetic for all data -// in group gid or descendants of gid. +// in group gid or descendants of gid. SetScaleAt must be called +// before Add. func (p *Plot) SetScaleAt(aes string, s Scaler, gid table.GroupID) *Plot { // TODO: Should aes be an enum so you can't mix up aesthetics // and column names? diff --git a/benchplot/vendor/github.com/aclements/go-gg/gg/render.go b/benchplot/vendor/github.com/aclements/go-gg/gg/render.go index d652734..78e2188 100644 --- a/benchplot/vendor/github.com/aclements/go-gg/gg/render.go +++ b/benchplot/vendor/github.com/aclements/go-gg/gg/render.go @@ -5,6 +5,7 @@ package gg import ( + "bytes" "fmt" "io" "math" @@ -208,17 +209,21 @@ func (e *eltSubplot) render(r *eltRender) { x, y, w, h := e.Layout() m := e.plotMargins + // Round the bounds rectangle in. + x2i, y2i := int(x+w), int(y+h) + xi, yi := int(math.Ceil(x)), int(math.Ceil(y)) + wi, hi := x2i-xi, y2i-yi + // Create clip region for plot area. clipId, clipRef := r.genid("clip") svg.ClipPath(`id="` + clipId + `"`) - svg.Rect(int(x), int(y), int(w), int(h)) + svg.Rect(xi, yi, wi, hi) svg.ClipEnd() svg.Group(`clip-path="` + clipRef + `"`) - defer svg.Gend() // Set scale ranges. - xRanger := NewFloatRanger(x+m.l, x+w-m.r) - yRanger := NewFloatRanger(y+h-m.b, y+m.t) + xRanger := NewFloatRanger(float64(xi)+m.l, float64(x2i)-m.r) + yRanger := NewFloatRanger(float64(y2i)-m.b, float64(yi)+m.t) for s := range e.scales["x"] { s.Ranger(xRanger) } @@ -227,18 +232,18 @@ func (e *eltSubplot) render(r *eltRender) { } // Render grid. - renderBackground(svg, x, y, w, h) + renderBackground(svg, xi, yi, wi, hi) for s := range e.scales["x"] { - renderGrid(svg, 'x', s, e.xTicks.ticks[s], y, y+h) + renderGrid(svg, 'x', s, e.xTicks.ticks[s], yi, y2i) } for s := range e.scales["y"] { - renderGrid(svg, 'y', s, e.yTicks.ticks[s], x, x+w) + renderGrid(svg, 'y', s, e.yTicks.ticks[s], xi, x2i) } // Create rendering environment. env := &renderEnv{ cache: make(map[renderCacheKey]table.Slice), - area: [4]float64{x, y, w, h}, + area: [4]float64{float64(xi), float64(yi), float64(wi), float64(hi)}, } // Render marks. @@ -249,39 +254,32 @@ func (e *eltSubplot) render(r *eltRender) { } } - // Skip border and scale ticks. + // End clip region. + svg.Gend() + + // Draw border and scale ticks. // // TODO: Theme. - return // Render border. - rnd := func(x float64) float64 { - // Round to nearest N. - return math.Floor(x + 0.5) - } - svg.Path(fmt.Sprintf("M%g %gV%gH%g", rnd(x), rnd(y), rnd(y+h), rnd(x+w)), "stroke:#888; fill:none; stroke-width:2") // TODO: Theme. + svg.Path(fmt.Sprintf("M%d %dV%dH%d", xi, yi, y2i, x2i), "stroke:#888; fill:none; stroke-width:2") // TODO: Theme. // Render scale ticks. for s := range e.scales["x"] { - renderScale(svg, 'x', s, e.xTicks.ticks[s], y+h) + renderScale(svg, 'x', s, e.xTicks.ticks[s], y2i) } for s := range e.scales["y"] { - renderScale(svg, 'y', s, e.yTicks.ticks[s], x) + renderScale(svg, 'y', s, e.yTicks.ticks[s], xi) } } // TODO: Use shape-rendering: crispEdges? -func renderBackground(svg *svg.SVG, x, y, w, h float64) { - r := func(x float64) int { - // Round to nearest N. - return int(math.Floor(x + 0.5)) - } - - svg.Rect(r(x), r(y), r(x+w)-r(x), r(y+h)-r(y), "fill:#eee") // TODO: Theme. +func renderBackground(svg *svg.SVG, x, y, w, h int) { + svg.Rect(x, y, w, h, "fill:#eee") // TODO: Theme. } -func renderGrid(svg *svg.SVG, dir rune, scale Scaler, ticks plotEltTicks, start, end float64) { +func renderGrid(svg *svg.SVG, dir rune, scale Scaler, ticks plotEltTicks, start, end int) { major := mapMany(scale, ticks.major).([]float64) r := func(x float64) float64 { @@ -292,34 +290,50 @@ func renderGrid(svg *svg.SVG, dir rune, scale Scaler, ticks plotEltTicks, start, var path []string for _, p := range major { if dir == 'x' { - path = append(path, fmt.Sprintf("M%.6g %.6gv%.6g", r(p), r(start), r(end)-r(start))) + path = append(path, fmt.Sprintf("M%.6g %dv%d", r(p), start, end-start)) } else { - path = append(path, fmt.Sprintf("M%.6g %.6gh%.6g", r(start), r(p), r(end)-r(start))) + path = append(path, fmt.Sprintf("M%d %.6gh%d", start, r(p), end-start)) } } - svg.Path(strings.Join(path, ""), "stroke: #fff; stroke-width:2") // TODO: Theme. + svg.Path(wrapPath(strings.Join(path, "")), "stroke: #fff; stroke-width:2") // TODO: Theme. } -func renderScale(svg *svg.SVG, dir rune, scale Scaler, ticks plotEltTicks, pos float64) { +func renderScale(svg *svg.SVG, dir rune, scale Scaler, ticks plotEltTicks, pos int) { const length float64 = 4 // TODO: Theme - major := mapMany(scale, ticks.major).([]float64) - - r := func(x float64) float64 { - // Round to nearest N. - return math.Floor(x + 0.5) - } - var path []string - for _, p := range major { - if dir == 'x' { - path = append(path, fmt.Sprintf("M%.6g %.6gv%.6g", r(p), r(pos), -length)) - } else { - path = append(path, fmt.Sprintf("M%.6g %.6gh%.6g", r(pos), r(p), length)) + var path bytes.Buffer + have := map[float64]bool{} + for _, t := range []struct { + length float64 + s table.Slice + }{ + {length * 2, ticks.major}, + {length, ticks.minor}, + } { + ticks := mapMany(scale, t.s).([]float64) + + r := func(x float64) float64 { + // Round to nearest N. + return math.Floor(x + 0.5) + } + for _, p := range ticks { + p = r(p) + if have[p] { + // Avoid overplotting the same tick + // marks. + continue + } + have[p] = true + if dir == 'x' { + fmt.Fprintf(&path, "M%.6g %dv%.6g", p, pos, -t.length) + } else { + fmt.Fprintf(&path, "M%d %.6gh%.6g", pos, p, t.length) + } } - } - svg.Path(strings.Join(path, ""), "stroke:#888; stroke-width:2") // TODO: Theme + } + svg.Path(wrapPath(path.String()), "stroke:#888; stroke-width:2") // TODO: Theme } func (e *eltTicks) render(r *eltRender) { @@ -432,3 +446,41 @@ func (env *renderEnv) Size() (w, h float64) { func round(x float64) int { return int(math.Floor(x + 0.5)) } + +// wrapPath wraps path data p to avoid exceeding SVG's recommended +// line length limit of 255 characters. +func wrapPath(p string) string { + const width = 70 + if len(p) <= width { + return p + } + // Chop up p until we get below the width limit. + parts := make([]string, 0, 16) + for len(p) > width { + // Find the last command or space before exceeding width. + lastCmd, lastSpace := 0, 0 + for i, ch := range p { + if i >= width && (lastCmd != 0 || lastSpace != 0) { + break + } + if 'a' <= ch && ch <= 'z' || 'A' <= ch && ch <= 'Z' { + lastCmd = i + } else if ch == ' ' { + lastSpace = i + } + } + split := len(p) + // Prefer splitting at commands, but take spaces in + // case it's a huge command. + if lastCmd != 0 { + split = lastCmd + } else if lastSpace != 0 { + split = lastSpace + } + parts, p = append(parts, p[:split]), p[split:] + } + if len(p) > 0 { + parts = append(parts, p) + } + return strings.Join(parts, "\n") +} diff --git a/benchplot/vendor/github.com/aclements/go-gg/gg/scale.go b/benchplot/vendor/github.com/aclements/go-gg/gg/scale.go index b7e56c8..dca263e 100644 --- a/benchplot/vendor/github.com/aclements/go-gg/gg/scale.go +++ b/benchplot/vendor/github.com/aclements/go-gg/gg/scale.go @@ -9,6 +9,8 @@ import ( "image/color" "math" "reflect" + "strings" + "time" "github.com/aclements/go-gg/generic" "github.com/aclements/go-gg/generic/slice" @@ -103,16 +105,47 @@ type Scaler interface { // XXX If x is Unscaled, Map must only apply the ranger. Map(x interface{}) interface{} - // XXX What should this return? moremath returns values in the - // input space, but that obviously doesn't work for discrete - // scales if I want the ticks between values. It could return - // values in the intermediate space or the output space. + // Ticks returns a set of "nice" major and minor tick marks + // spanning this Scaler's domain. The returned tick locations + // are values in this Scaler's domain type in increasing + // order. labels[i] gives the label of the major tick at + // major[i]. The minor ticks are a superset of the major + // ticks. + // + // max and pred constrain the ticks returned by Ticks. If + // possible, Ticks returns the largest set of ticks such that + // there are no more than max major ticks and the ticks + // satisfy pred. Both are hints, since for some scale types + // there's no clear way to reduce the number of ticks. + // + // pred should return true if the given set of ticks is + // acceptable. pred must be "monotonic" in the following + // sense: if pred is true for a given set of ticks, it must be + // true for any subset of those ticks and if pred is false for + // a given set of ticks, it must be false for any superset of + // those ticks. In other words, pred should return false if + // there are "too many" ticks or they are "too close + // together". If pred is nil, it is assumed to always be + // satisfied. + // + // If no tick marks can be produced (for example, there are no + // values in this Scaler's domain or the predicate cannot be + // satisfied), Ticks returns nil, nil, nil. + // + // TODO: Should this return ticks in the input space, the + // intermediate space, or the output space? moremath returns + // values in the input space. Input space values doesn't work + // for discrete scales if I want the ticks between values. // Intermediate space works for continuous and discrete - // inputs, but not for discrete ranges (maybe that's okay). - // Output space is bad because I change the plot location in - // the course of layout. Currently it returns values in the - // input space or nil if ticks don't make sense. - Ticks(max int, pred func(major []float64, labels []string) bool) (major, minor table.Slice, labels []string) + // inputs, but not for discrete ranges (maybe that's okay) and + // it's awkward for a caller to do anything with an + // intermediate space value. Output space doesn't work with + // this API because I change the plot location in the course + // of layout without recomputing ticks. However, output space + // could work if Scaler exposed tick levels, since I could + // save the computed tick level across a re-layout and + // recompute the output space ticks from that. + Ticks(max int, pred func(major, minor table.Slice, labels []string) bool) (major, minor table.Slice, labels []string) // SetFormatter sets the formatter for values on this scale. // @@ -134,14 +167,29 @@ type ContinuousScaler interface { // TODO: There are two variations on min/max. 1) We can force // the min/max, even if there's data beyond it. 2) We can say // cap the scale to some min/max, but a smaller range is okay. + // Currently we can't express 2. - SetMin(v float64) ContinuousScaler - SetMax(v float64) ContinuousScaler - - // TODO: Should Include take an interface{} and work on any - // Scalar? - - Include(v float64) ContinuousScaler + // SetMin and SetMax set the minimum and maximum values of + // this Scalar's domain and return the Scalar. If v is nil, it + // unsets the bound. + // + // v must be convertible to the Scaler's domain type. For + // example, if this is a linear scale, v can be of any + // numerical type. Unlike ExpandDomain, these do not set the + // Scaler's domain type. + SetMin(v interface{}) ContinuousScaler + SetMax(v interface{}) ContinuousScaler + + // TODO: Should Include work on any Scaler? + + // Include requires that v be included in this Scaler's + // domain. Like SetMin/SetMax, this can expand Scaler's + // domain, but unlike SetMin/SetMax, this does not restrict + // it. If v is nil, it does nothing. + // + // v must be convertible to the Scaler's domain type. Unlike + // ExpandDomain, this does not set the Scaler's domain type. + Include(v interface{}) ContinuousScaler } // Unscaled represents a value that should not be scaled, but instead @@ -252,7 +300,7 @@ func (s *defaultScale) Map(x interface{}) interface{} { return s.ensure().Map(x) } -func (s *defaultScale) Ticks(max int, pred func(major []float64, labels []string) bool) (major, minor table.Slice, labels []string) { +func (s *defaultScale) Ticks(max int, pred func(major, minor table.Slice, labels []string) bool) (major, minor table.Slice, labels []string) { return s.ensure().Ticks(max, pred) } @@ -279,6 +327,9 @@ func DefaultScale(seq table.Slice) (Scaler, error) { case []string: // TODO: Ordinal scale + + case []time.Time: + return NewTimeScaler(), nil } rt := reflect.TypeOf(seq).Elem() @@ -353,7 +404,7 @@ func (s *identityScale) RangeType() reflect.Type { func (s *identityScale) Ranger(r Ranger) Ranger { return nil } func (s *identityScale) Map(x interface{}) interface{} { return x } -func (s *identityScale) Ticks(max int, pred func(major []float64, labels []string) bool) (major, minor table.Slice, labels []string) { +func (s *identityScale) Ticks(max int, pred func(major, minor table.Slice, labels []string) bool) (major, minor table.Slice, labels []string) { return nil, nil, nil } @@ -372,33 +423,48 @@ func (s *identityScale) CloneScaler() Scaler { // Maybe a sub-interface for continuous Scalers? func NewLinearScaler() ContinuousScaler { // TODO: Control over base. - return &linearScale{ - s: scale.Linear{Min: math.NaN(), Max: math.NaN()}, + return &moremathScale{ + min: math.NaN(), + max: math.NaN(), + dataMin: math.NaN(), + dataMax: math.NaN(), + } +} + +func NewLogScaler(base int) ContinuousScaler { + return &moremathScale{ + min: math.NaN(), + max: math.NaN(), + base: base, dataMin: math.NaN(), dataMax: math.NaN(), } } -type linearScale struct { - s scale.Linear +type moremathScale struct { r Ranger f interface{} domainType reflect.Type + base int + min, max float64 dataMin, dataMax float64 } -func (s *linearScale) String() string { - return fmt.Sprintf("linear [%g,%g] => %s", s.s.Min, s.s.Max, s.r) +func (s *moremathScale) String() string { + if s.base > 0 { + return fmt.Sprintf("log [%d,%g,%g] => %s", s.base, s.min, s.max, s.r) + } + return fmt.Sprintf("linear [%g,%g] => %s", s.min, s.max, s.r) } -func (s *linearScale) ExpandDomain(v table.Slice) { +func (s *moremathScale) ExpandDomain(vs table.Slice) { if s.domainType == nil { - s.domainType = reflect.TypeOf(v).Elem() + s.domainType = reflect.TypeOf(vs).Elem() } var data []float64 - slice.Convert(&data, v) + slice.Convert(&data, vs) min, max := s.dataMin, s.dataMax for _, v := range data { if math.IsNaN(v) || math.IsInf(v, 0) { @@ -414,48 +480,77 @@ func (s *linearScale) ExpandDomain(v table.Slice) { s.dataMin, s.dataMax = min, max } -func (s *linearScale) SetMin(v float64) ContinuousScaler { - s.s.Min = v +func (s *moremathScale) SetMin(v interface{}) ContinuousScaler { + if v == nil { + s.min = math.NaN() + return s + } + vfloat := reflect.ValueOf(v).Convert(float64Type).Float() + s.min = vfloat return s } -func (s *linearScale) SetMax(v float64) ContinuousScaler { - s.s.Max = v +func (s *moremathScale) SetMax(v interface{}) ContinuousScaler { + if v == nil { + s.max = math.NaN() + return s + } + vfloat := reflect.ValueOf(v).Convert(float64Type).Float() + s.max = vfloat return s } -func (s *linearScale) Include(v float64) ContinuousScaler { - if math.IsNaN(v) || math.IsInf(v, 0) { +func (s *moremathScale) Include(v interface{}) ContinuousScaler { + if v == nil { + return s + } + vfloat := reflect.ValueOf(v).Convert(float64Type).Float() + if math.IsNaN(vfloat) || math.IsInf(vfloat, 0) { return s } if math.IsNaN(s.dataMin) { - s.dataMin, s.dataMax = v, v + s.dataMin, s.dataMax = vfloat, vfloat } else { - s.dataMin = math.Min(s.dataMin, v) - s.dataMax = math.Max(s.dataMax, v) + s.dataMin = math.Min(s.dataMin, vfloat) + s.dataMax = math.Max(s.dataMax, vfloat) } return s } -func (s *linearScale) get() scale.Linear { - ls := s.s - if ls.Min > ls.Max { - ls.Min, ls.Max = ls.Max, ls.Min +type tickMapper interface { + scale.Ticker + Map(float64) float64 +} + +func (s *moremathScale) get() tickMapper { + min, max := s.min, s.max + if min > max { + min, max = max, min } - if math.IsNaN(ls.Min) { - ls.Min = s.dataMin + if math.IsNaN(min) { + min = s.dataMin } - if math.IsNaN(ls.Max) { - ls.Max = s.dataMax + if math.IsNaN(max) { + max = s.dataMax } - if math.IsNaN(ls.Min) { + if math.IsNaN(min) { // Only possible if both dataMin and dataMax are NaN. - ls.Min, ls.Max = -1, 1 + min, max = -1, 1 + } + if s.base > 0 { + ls, err := scale.NewLog(min, max, s.base) + if err != nil { + panic(err) + } + ls.SetClamp(true) + return &ls + } + return &scale.Linear{ + Min: min, Max: max, } - return ls } -func (s *linearScale) Ranger(r Ranger) Ranger { +func (s *moremathScale) Ranger(r Ranger) Ranger { old := s.r if r != nil { s.r = r @@ -463,11 +558,11 @@ func (s *linearScale) Ranger(r Ranger) Ranger { return old } -func (s *linearScale) RangeType() reflect.Type { +func (s *moremathScale) RangeType() reflect.Type { return s.r.RangeType() } -func (s *linearScale) Map(x interface{}) interface{} { +func (s *moremathScale) Map(x interface{}) interface{} { ls := s.get() var scaled float64 switch x := x.(type) { @@ -500,10 +595,16 @@ func (s *linearScale) Map(x interface{}) interface{} { } } -func (s *linearScale) Ticks(max int, pred func(major []float64, labels []string) bool) (major, minor table.Slice, labels []string) { +func (s *moremathScale) Ticks(max int, pred func(major, minor table.Slice, labels []string) bool) (major, minor table.Slice, labels []string) { type Stringer interface { String() string } + if s.domainType == nil { + // There are no values and no domain type, so we can't + // compute ticks or return slices of the domain type. + return nil, nil, nil + } + o := scale.TickOptions{Max: max} // If the domain type is integral, don't let the tick level go @@ -515,6 +616,14 @@ func (s *linearScale) Ticks(max int, pred func(major []float64, labels []string) reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64, reflect.Uint8, reflect.Uint16, reflect.Uint32, reflect.Uint64: o.MinLevel, o.MaxLevel = 0, 1000 + default: + // Set bounds for the pred loop below. + o.MinLevel, o.MaxLevel = -1000, 1000 + } + ls := s.get() + level, ok := o.FindLevel(ls, 0) + if !ok { + return nil, nil, nil } mkLabels := func(major []float64) []string { @@ -558,22 +667,339 @@ func (s *linearScale) Ticks(max int, pred func(major []float64, labels []string) } return labels } - if pred != nil { - o.Pred = func(ticks []float64, level int) bool { - return pred(ticks, mkLabels(ticks)) + // Adjust level to satisfy pred. + for ; level <= o.MaxLevel; level++ { + majorx := ls.TicksAtLevel(level) + minorx := ls.TicksAtLevel(level - 1) + labels := mkLabels(majorx.([]float64)) + + // Convert to domain type. + majorv := reflect.New(reflect.SliceOf(s.domainType)) + minorv := reflect.New(reflect.SliceOf(s.domainType)) + slice.Convert(majorv.Interface(), majorx) + slice.Convert(minorv.Interface(), minorx) + major, minor = majorv.Elem().Interface(), minorv.Elem().Interface() + + if pred == nil || pred(major, minor, labels) { + return major, minor, labels } } - - ls := s.get() - majorx, minorx := ls.Ticks(o) - return majorx, minorx, mkLabels(majorx) + Warning.Printf("%s: unable to compute satisfactory ticks, axis will be empty", s) + return nil, nil, nil } -func (s *linearScale) SetFormatter(f interface{}) { +func (s *moremathScale) SetFormatter(f interface{}) { s.f = f } -func (s *linearScale) CloneScaler() Scaler { +func (s *moremathScale) CloneScaler() Scaler { + s2 := *s + return &s2 +} + +// NewTimeScaler returns a continuous linear scale. The domain must +// be time.Time. +func NewTimeScaler() *timeScale { + return &timeScale{} +} + +type timeScale struct { + r Ranger + f func(time.Time) string + min, max time.Time + dataMin, dataMax time.Time +} + +func (s *timeScale) String() string { + return fmt.Sprintf("time [%g,%g] => %s", s.min, s.max, s.r) +} + +func (s *timeScale) ExpandDomain(vs table.Slice) { + var data []time.Time + slice.Convert(&data, vs) + min, max := s.dataMin, s.dataMax + for _, v := range data { + if v.Before(min) || min.IsZero() { + min = v + } + if v.After(max) || max.IsZero() { + max = v + } + } + s.dataMin, s.dataMax = min, max +} + +func (s *timeScale) SetMin(v interface{}) ContinuousScaler { + s.min = v.(time.Time) + return s +} + +func (s *timeScale) SetMax(v interface{}) ContinuousScaler { + s.max = v.(time.Time) + return s +} + +func (s *timeScale) Include(v interface{}) ContinuousScaler { + tv := v.(time.Time) + if s.dataMin.IsZero() { + s.dataMin, s.dataMax = tv, tv + } else { + if tv.Before(s.dataMin) { + s.dataMin = tv + } + if tv.After(s.dataMax) { + s.dataMax = tv + } + } + return s +} + +func (s *timeScale) Ranger(r Ranger) Ranger { + old := s.r + if r != nil { + s.r = r + } + return old +} + +func (s *timeScale) RangeType() reflect.Type { + return s.r.RangeType() +} + +func (s *timeScale) getMinMax() (time.Time, time.Time) { + min := s.min + if min.IsZero() { + min = s.dataMin + } + max := s.max + if max.IsZero() { + max = s.dataMax + } + return min, max +} + +func (s *timeScale) Map(x interface{}) interface{} { + min, max := s.getMinMax() + t := x.(time.Time) + var scaled float64 = float64(t.Sub(min)) / float64(max.Sub(min)) + + switch r := s.r.(type) { + case ContinuousRanger: + return r.Map(scaled) + + case DiscreteRanger: + _, levels := r.Levels() + // Bin the scaled value into 'levels' bins. + level := int(scaled * float64(levels)) + if level < 0 { + level = 0 + } else if level >= levels { + level = levels - 1 + } + return r.MapLevel(level, levels) + + default: + panic("Ranger must be a ContinuousRanger or DiscreteRanger") + } +} + +type durationTicks time.Duration + +func (d durationTicks) Next(t time.Time) time.Time { + if d == 0 { + panic("invalid zero duration") + } + return t.Add(time.Duration(d)).Truncate(time.Duration(d)) +} + +var timeTickerLevels = []struct { + min time.Duration + next func(t time.Time) time.Time +}{ + {time.Minute, durationTicks(time.Minute).Next}, + {10 * time.Minute, durationTicks(10 * time.Minute).Next}, + {time.Hour, func(t time.Time) time.Time { + year, month, day := t.Date() + // N.B. This will skip an hour at some DST transitions. + return time.Date(year, month, day, t.Hour()+1, 0, 0, 0, t.Location()) + }}, + {6 * time.Hour, func(t time.Time) time.Time { + year, month, day := t.Date() + // N.B. This will skip an hour if the DST transition + // happens at a multiple of 6 hours. + return time.Date(year, month, day, ((t.Hour()+6)/6)*6, 0, 0, 0, t.Location()) + }}, + {24 * time.Hour, func(t time.Time) time.Time { + year, month, day := t.Date() + return time.Date(year, month, day+1, 0, 0, 0, 0, t.Location()) + }}, + {7 * 24 * time.Hour, func(t time.Time) time.Time { + year, month, day := t.Date() + loc := t.Location() + _, week1 := t.ISOWeek() + for { + day++ + t = time.Date(year, month, day, 0, 0, 0, 0, loc) + if _, week2 := t.ISOWeek(); week1 != week2 { + return t + } + } + }}, + {30 * 24 * time.Hour, func(t time.Time) time.Time { + year, month, _ := t.Date() + return time.Date(year, month+1, 1, 0, 0, 0, 0, t.Location()) + }}, + {365 * 24 * time.Hour, func(t time.Time) time.Time { + return time.Date(t.Year()+1, time.January, 1, 0, 0, 0, 0, t.Location()) + }}, +} + +// timeTicker calculates the ticks between min and max. levels >= 0 +// refer to entries in timeTickerLevels. levels < 0 start with -1 at +// every 10 seconds and then alternate dividing by 2 and 5. So level +// -3 is 1s, -9 is 1ms, -12 is 1us, etc. +// https://play.golang.org/p/xUv4P25Wxi will print the level step +// sizes. +type timeTicker struct { + min, max time.Time +} + +func (t *timeTicker) getNextTick(level int) func(time.Time) time.Time { + if level >= 0 { + if level >= len(timeTickerLevels) { + // TODO: larger ticks should do multiples of + // the year, like the linear scale does. + panic(fmt.Sprintf("invalid level %d", level)) + } + return timeTickerLevels[level].next + } else { + exp, double := level/2+1, (level%2 == 0) + step := math.Pow10(exp) * 1e9 + if double { + step = step * 5 + } + return durationTicks(time.Duration(step)).Next + } +} + +func (t *timeTicker) CountTicks(level int) int { + next := t.getNextTick(level) + var i int + // N.B. We cut off at 1e5 ticks. If your plot is larger than + // that, you're on your own. + for x := next(t.min.Add(-1)); !x.After(t.max) && i < 1e5; x = next(x) { + i++ + } + return i +} + +func (t *timeTicker) TicksAtLevel(level int) interface{} { + var ticks []time.Time + next := t.getNextTick(level) + for x := next(t.min.Add(-1)); !x.After(t.max); x = next(x) { + ticks = append(ticks, x) + } + return ticks +} + +func (t *timeTicker) GuessLevel() int { + dur := t.max.Sub(t.min) + for i := len(timeTickerLevels) - 1; i >= 0; i-- { + if dur > timeTickerLevels[i].min { + return i + } + } + return int(2 * (math.Log10(float64(dur)/1e9) - 2)) +} + +func (timeTicker) MaxLevel() int { + return len(timeTickerLevels) - 1 +} + +func (timeTicker) Label(cur, prev time.Time, level int) string { + dateFmt := "2006" + switch { + case level < 6: + dateFmt = "2006/1/2" + if !prev.IsZero() { + if prev.Year() == cur.Year() { + dateFmt = "Jan 2" + _, prevweek := prev.ISOWeek() + _, curweek := cur.ISOWeek() + if prevweek == curweek { + dateFmt = "Mon" + if prev.YearDay() == cur.YearDay() { + dateFmt = "" + } + } + } + } + case level < 7: + dateFmt = "2006/1" + if !prev.IsZero() && prev.Year() == cur.Year() { + dateFmt = "Jan" + } + } + timeFmt := "" + switch { + case level < -3: // < 1s + digits := (-level - 2) / 2 + timeFmt = "15:04:05." + strings.Repeat("0", digits) + case level < 0: // < 1m + timeFmt = "15:04:05" + case level < 4: // < 1d + timeFmt = "15:04" + } + return cur.Format(strings.TrimSpace(dateFmt + " " + timeFmt)) +} + +func (s *timeScale) Ticks(maxTicks int, pred func(major, minor table.Slice, labels []string) bool) (table.Slice, table.Slice, []string) { + min, max := s.getMinMax() + ticker := &timeTicker{min, max} + o := scale.TickOptions{Max: maxTicks, MinLevel: -21, MaxLevel: ticker.MaxLevel()} + level, ok := o.FindLevel(ticker, ticker.GuessLevel()) + if !ok { + // TODO(quentin): Better handling of too-large time range. + return nil, nil, nil + } + mkLabels := func(major []time.Time) []string { + // TODO(quentin): Pick a format based on which parts + // of the time have changed and are non-zero. + labels := make([]string, len(major)) + if s.f != nil { + // Use custom formatter. + for i, x := range major { + labels[i] = s.f(x) + } + return labels + } + var prev time.Time + for i, t := range major { + labels[i] = ticker.Label(t, prev, level) + prev = t + } + return labels + } + var majors, minors []time.Time + var labels []string + for ; level <= o.MaxLevel; level++ { + majors = ticker.TicksAtLevel(level).([]time.Time) + if level > o.MinLevel { + minors = ticker.TicksAtLevel(level - 1).([]time.Time) + } + labels = mkLabels(majors) + if pred == nil || pred(majors, minors, labels) { + break + } + } + return majors, minors, labels +} + +func (s *timeScale) SetFormatter(f interface{}) { + s.f = f.(func(time.Time) string) +} + +func (s *timeScale) CloneScaler() Scaler { s2 := *s return &s2 } @@ -667,7 +1093,7 @@ func (s *ordinalScale) Map(x interface{}) interface{} { } } -func (s *ordinalScale) Ticks(max int, pred func(major []float64, labels []string) bool) (major, minor table.Slice, labels []string) { +func (s *ordinalScale) Ticks(max int, pred func(major, minor table.Slice, labels []string) bool) (major, minor table.Slice, labels []string) { // TODO: Return *no* ticks and only labels. Can't currently // express this. @@ -842,6 +1268,9 @@ func (r *defaultColorRanger) MapLevel(i, j int) interface{} { func mapMany(scaler Scaler, seq table.Slice) table.Slice { sv := reflect.ValueOf(seq) rt := reflect.SliceOf(scaler.RangeType()) + if seq == nil { + return reflect.MakeSlice(rt, 0, 0).Interface() + } res := reflect.MakeSlice(rt, sv.Len(), sv.Len()) for i, len := 0, sv.Len(); i < len; i++ { val := scaler.Map(sv.Index(i).Interface()) diff --git a/benchplot/vendor/github.com/aclements/go-gg/gg/testmain.go b/benchplot/vendor/github.com/aclements/go-gg/gg/testmain.go index b078fff..c37b033 100644 --- a/benchplot/vendor/github.com/aclements/go-gg/gg/testmain.go +++ b/benchplot/vendor/github.com/aclements/go-gg/gg/testmain.go @@ -38,7 +38,7 @@ func main() { xs := vec.Concat(xs1, xs2) ys := vec.Concat(ys1, ys2) - tab := new(table.Table).Add("x", xs).Add("y", ys).Add("which", which) + tab := table.NewBuilder(nil).Add("x", xs).Add("y", ys).Add("which", which).Done() plot := gg.NewPlot(tab) plot.GroupAuto() diff --git a/benchplot/vendor/github.com/aclements/go-gg/gg/text.go b/benchplot/vendor/github.com/aclements/go-gg/gg/text.go index 5751bf2..1869aaf 100644 --- a/benchplot/vendor/github.com/aclements/go-gg/gg/text.go +++ b/benchplot/vendor/github.com/aclements/go-gg/gg/text.go @@ -21,7 +21,7 @@ func measureString(pxSize float64, s string) textMetrics { // Chrome's default font-size is 16px, so 20px is a reasonable // leading. return textMetrics{ - width: 0.75 * pxSize * float64(len(s)), + width: 0.5 * pxSize * float64(len(s)), leading: 1.25 * pxSize, } } diff --git a/benchplot/vendor/github.com/aclements/go-gg/ggstat/agg.go b/benchplot/vendor/github.com/aclements/go-gg/ggstat/agg.go index cf25810..e900266 100644 --- a/benchplot/vendor/github.com/aclements/go-gg/ggstat/agg.go +++ b/benchplot/vendor/github.com/aclements/go-gg/ggstat/agg.go @@ -128,6 +128,9 @@ func checkConst(t *table.Table, col string) bool { if v.Len() <= 1 { return true } + if !v.Type().Comparable() { + return false + } elem := v.Index(0).Interface() for i, l := 1, v.Len(); i < l; i++ { if elem != v.Index(i).Interface() { diff --git a/benchplot/vendor/github.com/aclements/go-gg/ggstat/bin.go b/benchplot/vendor/github.com/aclements/go-gg/ggstat/bin.go index 7b457ac..2602004 100644 --- a/benchplot/vendor/github.com/aclements/go-gg/ggstat/bin.go +++ b/benchplot/vendor/github.com/aclements/go-gg/ggstat/bin.go @@ -4,14 +4,16 @@ package ggstat -import "github.com/aclements/go-gg/table" +import ( + "math" + "reflect" + "sort" -// XXX Maybe these should all be structs that satisfy the same basic -// interface{F(table.Grouping) table.Grouping}. Then optional -// arguments are easy and gg.Plot could have a Stat method that -// applies a ggstat (what would it do with the bindings?). E.g., it -// would be nice if you could just say -// plot.Stat(ggstat.ECDF{}).Add(gglayer.Steps{}). + "github.com/aclements/go-gg/generic" + "github.com/aclements/go-gg/generic/slice" + "github.com/aclements/go-gg/table" + "github.com/aclements/go-moremath/vec" +) // XXX If this is just based on the number of bins, it can come up // with really ugly boundary numbers. If the bin width is specified, @@ -28,8 +30,167 @@ import "github.com/aclements/go-gg/table" // has lots more). // // XXX Scale transform. -func Bin(g table.Grouping, xcol, wcol string) table.Grouping { - return nil +// +// The result of Bin has two columns in addition to constant columns from the input: +// +// - Column X is the left edge of the bin. +// +// - Column W is the sum of the rows' weights, or column "count" is +// the number of rows in the bin. +type Bin struct { + // X is the name of the column to use for samples. + X string + + // W is the optional name of the column to use for sample + // weights. It may be "" to weight each sample as 1. + W string + + // Width controls how wide each bin should be. If not provided + // or 0, a width will be chosen to produce 30 bins. If X is an + // integer column, this width will be treated as an integer as + // well. + Width float64 + + // Center controls the center point of each bin. To center on + // integers, for example, you could use {Width: 1, Center: + // 0}. + // XXX What does center mean for integers? Should an unspecified center yield an autochosen one, or 0? + //Center float64 + + // Breaks is the set of break points to use as boundaries + // between bins. The interval of each bin is [Breaks[i], + // Breaks[i+1]). Data points before the first break are + // dropped. If provided, Width and Center are ignored. + Breaks table.Slice + + // SplitGroups indicates that each group in the table should + // have separate bounds based on the data in that group alone. + // The default, false, indicates that the binning function + // should use the bounds of all of the data combined. This + // makes it easier to compare bins across groups. + SplitGroups bool +} + +func (b Bin) F(g table.Grouping) table.Grouping { + breaks := reflect.ValueOf(b.Breaks) + agg := AggCount("count") + if b.W != "" { + agg = aggFn(vec.Sum, "", b.W) + } + if !breaks.IsValid() && !b.SplitGroups { + breaks = b.computeBreaks(g) + } + // Change b.X to the start of the bin. + g = table.MapTables(g, func(_ table.GroupID, t *table.Table) *table.Table { + breaks := breaks + if !breaks.IsValid() { + breaks = b.computeBreaks(t) + } + nbreaks := breaks.Len() + + in := reflect.ValueOf(t.MustColumn(b.X)) + nin := in.Len() + + out := reflect.MakeSlice(breaks.Type(), nin, nin) + var found []int + for i := 0; i < nin; i++ { + elt := in.Index(i) + bin := sort.Search(nbreaks, func(j int) bool { + return generic.OrderR(elt, breaks.Index(j)) < 0 + }) + // 0 means the row doesn't fit on the front + // XXX Allow configuring the first and last bin as infinite or not. + bin = bin - 1 + if bin >= 0 { + found = append(found, i) + out.Index(i).Set(breaks.Index(bin)) + } + } + var nt table.Builder + for _, col := range t.Columns() { + if col == b.X { + nt.Add(col, slice.Select(out.Interface(), found)) + } else if c, ok := t.Const(col); ok { + nt.AddConst(col, c) + } else { + nt.Add(col, slice.Select(t.Column(col), found)) + } + } + return nt.Done() + }) + // Group by the found bin + return Agg(b.X)(agg).F(g) +} + +func (b Bin) computeBreaks(g table.Grouping) reflect.Value { + var cols []slice.T + for _, gid := range g.Tables() { + cols = append(cols, g.Table(gid).MustColumn(b.X)) + } + data := slice.Concat(cols...) + + min := slice.Min(data) + max := slice.Max(data) + + rv := reflect.ValueOf(min) + switch rv.Type().Kind() { + case reflect.Int, reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64: + min, max := rv.Int(), reflect.ValueOf(max).Int() + width := int64(b.Width) + if width == 0 { + width = (max - min) / 30 + if width < 1 { + width = 1 + } + } + // XXX: This assumes boundaries should be aligned with + // 0. We should support explicit Center or Boundary + // requests. + min -= (min % width) + var breaks []int64 + for i := min; i < max; i += width { + breaks = append(breaks, i) + } + outs := reflect.New(reflect.ValueOf(cols[0]).Type()) + slice.Convert(outs.Interface(), breaks) + return outs.Elem() + case reflect.Uint, reflect.Uint8, reflect.Uint16, reflect.Uint32, reflect.Uint64, reflect.Uintptr: + min, max := rv.Uint(), reflect.ValueOf(max).Uint() + width := uint64(b.Width) + if width == 0 { + width = (max - min) / 30 + if width < 1 { + width = 1 + } + } + min -= (min % width) + var breaks []uint64 + for i := min; i < max; i += width { + breaks = append(breaks, i) + } + outs := reflect.New(reflect.ValueOf(cols[0]).Type()) + slice.Convert(outs.Interface(), breaks) + return outs.Elem() + case reflect.Float32, reflect.Float64: + min, max := rv.Float(), reflect.ValueOf(max).Float() + width := b.Width + if width == 0 { + width = (max - min) / 30 + if width == 0 { + width = 1 + } + } + min -= math.Mod(min, width) + var breaks []float64 + for i := min; i < max; i += width { + breaks = append(breaks, i) + } + outs := reflect.New(reflect.ValueOf(cols[0]).Type()) + slice.Convert(outs.Interface(), breaks) + return outs.Elem() + default: + panic("can't compute breaks for unknown type") + } } // TODO: Count for categorical data. diff --git a/benchplot/vendor/github.com/aclements/go-gg/ggstat/density.go b/benchplot/vendor/github.com/aclements/go-gg/ggstat/density.go index d108588..73b8a60 100644 --- a/benchplot/vendor/github.com/aclements/go-gg/ggstat/density.go +++ b/benchplot/vendor/github.com/aclements/go-gg/ggstat/density.go @@ -5,8 +5,6 @@ package ggstat import ( - "math" - "github.com/aclements/go-gg/generic/slice" "github.com/aclements/go-gg/table" "github.com/aclements/go-moremath/stats" @@ -42,25 +40,15 @@ type Density struct { // // TODO: This is particularly sensitive to the scale // transform. + // + // TODO: Base the default on the bandwidth. If the bandwidth + // is really narrow, we may need a lot of samples to exceed + // the Nyquist rate. N int - // Widen controls the domain of the returned density estimate. - // If Widen is < 0, the domain is the range of the data. - // Otherwise, the domain will be expanded by Widen*Bandwidth - // (which may be the computed bandwidth). If Widen is 0, it is - // replaced with a default value of 3. - // - // TODO: This is different from ECDF and LeastSquares. Make - // them consistent. - Widen float64 - - // SplitGroups indicates that each group in the table should - // have separate bounds based on the data in that group alone. - // The default, false, indicates that the density function - // should use the bounds of all of the data combined. This - // makes it possible to stack KDEs and easier to compare KDEs - // across groups. - SplitGroups bool + // Domain specifies the domain at which to sample this function. + // If Domain is nil, it defaults to DomainData{}. + Domain FunctionDomainer // Kernel is the kernel to use for the KDE. Kernel stats.KDEKernel @@ -96,72 +84,40 @@ func (d Density) F(g table.Grouping) table.Grouping { BoundaryMin: d.BoundaryMin, BoundaryMax: d.BoundaryMax, } - if d.N == 0 { - d.N = 200 - } - if d.Widen == 0 { - d.Widen = 3 - } dname, cname := "probability density", "cumulative density" - // Gather samples. - samples := map[table.GroupID]stats.Sample{} - for _, gid := range g.Tables() { - t := g.Table(gid) - var sample stats.Sample - slice.Convert(&sample.Xs, t.MustColumn(d.X)) - if d.W != "" { - slice.Convert(&sample.Weights, t.MustColumn(d.W)) - } - samples[gid] = sample + addEmpty := func(out *table.Builder) { + out.Add(dname, []float64{}) + out.Add(cname, []float64{}) } - min, max := math.NaN(), math.NaN() - if !d.SplitGroups { - // Compute combined bounds. - for _, sample := range samples { - smin, smax := sample.Bounds() - if math.IsNaN(smin) { - continue + return Function{ + X: d.X, N: d.N, Domain: d.Domain, + Fn: func(gid table.GroupID, in *table.Table, sampleAt []float64, out *table.Builder) { + if len(sampleAt) == 0 { + addEmpty(out) + return } - bandwidth := d.Bandwidth - if d.Bandwidth == 0 { - bandwidth = stats.BandwidthScott(sample) + // Get input sample. + var sample stats.Sample + slice.Convert(&sample.Xs, in.MustColumn(d.X)) + if d.W != "" { + slice.Convert(&sample.Weights, in.MustColumn(d.W)) + if sample.Weight() == 0 { + addEmpty(out) + return + } } - smin, smax = smin-d.Widen*bandwidth, smax+d.Widen*bandwidth - if smin < min || math.IsNaN(min) { - min = smin - } - if smax > max || math.IsNaN(max) { - max = smax + // Compute KDE. + kde.Sample = sample + if d.Bandwidth == 0 { + kde.Bandwidth = stats.BandwidthScott(sample) } - } - } - return table.MapTables(g, func(gid table.GroupID, t *table.Table) *table.Table { - kde.Sample = samples[gid] - - if kde.Sample.Weight() == 0 { - return new(table.Builder).Add(d.X, []float64{}).Add(dname, []float64{}).Add(cname, []float64{}).Done() - } - - if d.Bandwidth == 0 { - kde.Bandwidth = stats.BandwidthScott(kde.Sample) - } - - if d.SplitGroups { - // Compute group bounds. - min, max = kde.Sample.Bounds() - min, max = min-d.Widen*kde.Bandwidth, max+d.Widen*kde.Bandwidth - } - - ss := vec.Linspace(min, max, d.N) - nt := new(table.Builder).Add(d.X, ss) - nt.Add(dname, vec.Map(kde.PDF, ss)) - nt.Add(cname, vec.Map(kde.CDF, ss)) - preserveConsts(nt, t) - return nt.Done() - }) + out.Add(dname, vec.Map(kde.PDF, sampleAt)) + out.Add(cname, vec.Map(kde.CDF, sampleAt)) + }, + }.F(g) } diff --git a/benchplot/vendor/github.com/aclements/go-gg/ggstat/domain.go b/benchplot/vendor/github.com/aclements/go-gg/ggstat/domain.go new file mode 100644 index 0000000..77bb78e --- /dev/null +++ b/benchplot/vendor/github.com/aclements/go-gg/ggstat/domain.go @@ -0,0 +1,109 @@ +// Copyright 2016 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package ggstat + +import ( + "math" + + "github.com/aclements/go-gg/generic/slice" + "github.com/aclements/go-gg/table" + "github.com/aclements/go-moremath/stats" +) + +// A FunctionDomainer computes the domain over which to evaluate a +// statistical function. +type FunctionDomainer interface { + // FunctionDomain computes the domain of a particular column + // within a table. It takes a Grouping and a column in that + // Grouping to compute the domain of and returns a function + // that computes the domain for a specific group in the + // Grouping. This makes it possible for FunctionDomain to + // easily compute either Grouping-wide domains, or per-Table + // domains. + // + // The returned domain may be (NaN, NaN) to indicate that + // there is no data and the domain is vacuous. + FunctionDomain(g table.Grouping, col string) func(gid table.GroupID) (min, max float64) +} + +// DomainFixed is a FunctionDomainer that returns a fixed domain. +type DomainFixed struct { + Min, Max float64 +} + +var _ FunctionDomainer = DomainFixed{} + +func (r DomainFixed) FunctionDomain(g table.Grouping, col string) func(gid table.GroupID) (min, max float64) { + return func(table.GroupID) (min, max float64) { + return r.Min, r.Max + } +} + +// DomainData is a FunctionDomainer that computes domains based on the +// bounds of the data. +type DomainData struct { + // Widen expands the domain by Widen times the span of the + // data. + // + // A value of 1.0 means to use exactly the bounds of the data. + // If Widen is 0, it is treated as 1.1 (that is, widen the + // domain by 10%, or 5% on the left and 5% on the right). + Widen float64 + + // SplitGroups indicates that each group in the table should + // have a separate domain based on the data in that group + // alone. The default, false, indicates that the domain should + // be based on all of the data in the table combined. This + // makes it possible to stack functions and easier to compare + // them across groups. + SplitGroups bool +} + +var _ FunctionDomainer = DomainData{} + +const defaultWiden = 1.1 + +func (r DomainData) FunctionDomain(g table.Grouping, col string) func(gid table.GroupID) (min, max float64) { + widen := r.Widen + if widen <= 0 { + widen = defaultWiden + } + + var xs []float64 + if !r.SplitGroups { + // Compute combined bounds. + gmin, gmax := math.NaN(), math.NaN() + for _, gid := range g.Tables() { + t := g.Table(gid) + slice.Convert(&xs, t.MustColumn(col)) + xmin, xmax := stats.Bounds(xs) + if xmin < gmin || math.IsNaN(gmin) { + gmin = xmin + } + if xmax > gmax || math.IsNaN(gmax) { + gmax = xmax + } + } + + // Widen bounds. + span := gmax - gmin + gmin, gmax = gmin-span*(widen-1)/2, gmax+span*(widen-1)/2 + + return func(table.GroupID) (min, max float64) { + return gmin, gmax + } + } + + return func(gid table.GroupID) (min, max float64) { + // Compute bounds. + slice.Convert(&xs, g.Table(gid).MustColumn(col)) + min, max = stats.Bounds(xs) + + // Widen bounds. + span := max - min + min, max = min-span*(widen-1)/2, max+span*(widen-1)/2 + return + } +} diff --git a/benchplot/vendor/github.com/aclements/go-gg/ggstat/ecdf.go b/benchplot/vendor/github.com/aclements/go-gg/ggstat/ecdf.go index 2dcbb1b..46364a8 100644 --- a/benchplot/vendor/github.com/aclements/go-gg/ggstat/ecdf.go +++ b/benchplot/vendor/github.com/aclements/go-gg/ggstat/ecdf.go @@ -42,25 +42,20 @@ type ECDF struct { // should be a plural noun. Label string - // Widen adjusts the domain of the returned ECDF. If Widen is - // not 1.0, ECDF will add a point below the smallest sample - // and above the largest sample to make the 0 and 1 levels - // clear. If Widen is 0, it is treated as 1.1 (that is, widen - // the domain by 10%, or 5% on the left and 5% on the right). - // - // TODO: Have a way to specify a specific range? - Widen float64 - - // SplitGroups indicates that each group in the table should - // have separate bounds based on the data in that group alone. - // The default, false, indicates that the bounds should be - // based on all of the data in the table combined. This makes - // it possible to stack ECDFs and easier to compare them - // across groups. - SplitGroups bool + // Domain specifies the domain of the returned ECDF. If the + // domain is wider than the bounds of the data in a group, + // ECDF will add a point below the smallest sample and above + // the largest sample to make the 0 and 1 levels clear. If + // Domain is nil, it defaults to DomainData{}. + Domain FunctionDomainer } func (s ECDF) F(g table.Grouping) table.Grouping { + // Set defaults. + if s.Domain == nil { + s.Domain = DomainData{} + } + // Construct output column names. dname, cname := "cumulative density", "cumulative count" if s.Label != "" { @@ -71,30 +66,31 @@ func (s ECDF) F(g table.Grouping) table.Grouping { } g = table.SortBy(g, s.X) - if s.Widen <= 1.0 && s.Widen != 0 { - // Disallow narrowing, since this isn't a continuous - // function. - s.Widen = 1.0 - } - col := getCol(g, s.X, s.Widen, s.SplitGroups) + domain := s.Domain.FunctionDomain(g, s.X) + return table.MapTables(g, func(gid table.GroupID, t *table.Table) *table.Table { // Get input columns. - var ws []float64 - xs := col[gid].data + var xs, ws []float64 + slice.Convert(&xs, t.MustColumn(s.X)) if s.W != "" { slice.Convert(&ws, t.MustColumn(s.W)) } // Ignore empty tables. if len(xs) == 0 { - return new(table.Builder).Add(s.X, []float64{}).Add(cname, []float64{}).Add(dname, []float64{}).Done() + nt := new(table.Builder).Add(s.X, []float64{}).Add(cname, []float64{}).Add(dname, []float64{}) + preserveConsts(nt, t) + return nt.Done() } + // Get domain. + min, max := domain(gid) + // Create output columns. xo, do, co := make([]float64, 0), make([]float64, 0), make([]float64, 0) - if s.Widen != 1.0 { + if min < xs[0] { // Extend to the left. - xo = append(xo, col[gid].min) + xo = append(xo, min) do = append(do, 0) co = append(co, 0) } @@ -127,9 +123,9 @@ func (s ECDF) F(g table.Grouping) table.Grouping { i = j } - if s.Widen != 1.0 { + if xs[len(xs)-1] < max { // Extend to the right. - xo = append(xo, col[gid].max) + xo = append(xo, max) do = append(do, 1) co = append(co, cum) } diff --git a/benchplot/vendor/github.com/aclements/go-gg/ggstat/fn.go b/benchplot/vendor/github.com/aclements/go-gg/ggstat/fn.go index d4d0509..0a4de33 100644 --- a/benchplot/vendor/github.com/aclements/go-gg/ggstat/fn.go +++ b/benchplot/vendor/github.com/aclements/go-gg/ggstat/fn.go @@ -6,72 +6,118 @@ package ggstat import ( "math" + "reflect" "github.com/aclements/go-gg/generic/slice" "github.com/aclements/go-gg/table" - "github.com/aclements/go-moremath/stats" + "github.com/aclements/go-moremath/vec" ) -type colInfo struct { - data []float64 - min, max float64 +// Function samples a continuous univariate function at N points in +// the domain computed by Domain. +// +// The result of Function binds column X to the X values at which the +// function is sampled and retains constant columns from the input. +// The computed function can add arbitrary columns for its output. +type Function struct { + // X is the name of the column to use for input domain of this + // function. + X string + + // N is the number of points to sample the function at. If N + // is 0, a reasonable default is used. + N int + + // Domain specifies the domain of which to sample this function. + // If Domain is nil, it defaults to DomainData{}. + Domain FunctionDomainer + + // Fn is the continuous univariate function to sample. Fn will + // be called with each table in the grouping and the X values + // at which it should be sampled. Fn must add its output + // columns to out. The output table will already contain the + // sample points bound to the X column. + Fn func(gid table.GroupID, in *table.Table, sampleAt []float64, out *table.Builder) } -// getCol extracts column x from each group, converts it to []float64, -// and finds its bounds. -// -// TODO: Maybe this should be a callback interface to avoid building -// the map and holding on to so much allocation? -func getCol(g table.Grouping, x string, widen float64, splitGroups bool) map[table.GroupID]colInfo { - if widen <= 0 { - widen = 1.1 - } +const defaultFunctionSamples = 200 - col := make(map[table.GroupID]colInfo) - - if !splitGroups { - // Compute combined bounds. - min, max := math.NaN(), math.NaN() - for _, gid := range g.Tables() { - var xs []float64 - t := g.Table(gid) - slice.Convert(&xs, t.MustColumn(x)) - xmin, xmax := stats.Bounds(xs) - if xmin < min || math.IsNaN(min) { - min = xmin - } - if xmax > max || math.IsNaN(max) { - max = xmax - } - col[gid] = colInfo{xs, 0, 0} - } +func (f Function) F(g table.Grouping) table.Grouping { + // Set defaults. + if f.N <= 0 { + f.N = defaultFunctionSamples + } + if f.Domain == nil { + f.Domain = DomainData{} + } - // Widen bounds. - span := max - min - min, max = min-span*(widen-1)/2, max+span*(widen-1)/2 + domain := f.Domain.FunctionDomain(g, f.X) + return table.MapTables(g, func(gid table.GroupID, t *table.Table) *table.Table { + min, max := domain(gid) - for gid, info := range col { - info.min, info.max = min, max - col[gid] = info + // Compute sample points. If there's no data, there + // are no sample points, but we still have to run the + // function to get the right output columns. + var ss []float64 + if math.IsNaN(min) { + ss = []float64{} + } else { + ss = vec.Linspace(min, max, f.N) } - return col - } + var nt table.Builder + ctype := table.ColType(t, f.X) + if ctype == float64Type { + // Bind output X column. + nt.Add(f.X, ss) + } else { + // Convert to the column type. + vsp := reflect.New(ctype) + slice.Convert(vsp.Interface(), ss) + vs := vsp.Elem() + // This may have produced duplicate values. + // Eliminate those. + if vs.Len() > 0 { + prev, i := vs.Index(0).Interface(), 1 + for j := 1; j < vs.Len(); j++ { + next := vs.Index(j).Interface() + if prev == next { + // Skip duplicate. + continue + } - // Find bounds for each group separately. - for _, gid := range g.Tables() { - t := g.Table(gid) + if i != j { + vs.Index(i).Set(vs.Index(j)) + } + i++ + prev = next + } + vs.SetLen(i) + } + // Bind column-typed values to output X. + nt.Add(f.X, vs.Interface()) + // And convert back to []float64 so we can + // apply the function. + slice.Convert(&ss, vs.Interface()) + } - // Compute bounds. - var xs []float64 - slice.Convert(&xs, t.MustColumn(x)) - min, max := stats.Bounds(xs) + // Apply the function to the sample points. + f.Fn(gid, t, ss, &nt) - // Widen bounds. - span := max - min - min, max = min-span*(widen-1)/2, max+span*(widen-1)/2 + preserveConsts(&nt, t) + return nt.Done() + }) +} - col[gid] = colInfo{xs, min, max} +// preserveConsts copies the constant columns from t into nt. +func preserveConsts(nt *table.Builder, t *table.Table) { + for _, col := range t.Columns() { + if nt.Has(col) { + // Don't overwrite existing columns in nt. + continue + } + if cv, ok := t.Const(col); ok { + nt.AddConst(col, cv) + } } - return col } diff --git a/benchplot/vendor/github.com/aclements/go-gg/ggstat/loess.go b/benchplot/vendor/github.com/aclements/go-gg/ggstat/loess.go index d57cee4..e516ab4 100644 --- a/benchplot/vendor/github.com/aclements/go-gg/ggstat/loess.go +++ b/benchplot/vendor/github.com/aclements/go-gg/ggstat/loess.go @@ -5,12 +5,9 @@ package ggstat import ( - "math" - "github.com/aclements/go-gg/generic/slice" "github.com/aclements/go-gg/table" "github.com/aclements/go-moremath/fit" - "github.com/aclements/go-moremath/stats" "github.com/aclements/go-moremath/vec" ) @@ -39,19 +36,9 @@ type LOESS struct { // is 0, a reasonable default is used. N int - // Widen sets the domain of the returned LOESS sample points - // to Widen times the span of the data. If Widen is 0, it is - // treated as 1.1 (that is, widen the domain by 10%, or 5% on - // the left and 5% on the right). - Widen float64 - - // SplitGroups indicates that each group in the table should - // have separate bounds based on the data in that group alone. - // The default, false, indicates that the bounds should be - // based on all of the data in the table combined. This makes - // it possible to stack LOESS fits and easier to compare them - // across groups. - SplitGroups bool + // Domain specifies the domain at which to sample this function. + // If Domain is nil, it defaults to DomainData{}. + Domain FunctionDomainer // Degree specifies the degree of the local fit function. If // it is 0, it is treated as 2. @@ -71,106 +58,23 @@ func (s LOESS) F(g table.Grouping) table.Grouping { s.Span = 0.5 } - evals := evalPoints(g, s.X, s.N, s.Widen, s.SplitGroups) - var xs, ys []float64 - return table.MapTables(g, func(gid table.GroupID, t *table.Table) *table.Table { - if t.Len() == 0 { - nt := new(table.Builder).Add(s.X, []float64{}).Add(s.Y, []float64{}) - preserveConsts(nt, t) - return nt.Done() - } - - // TODO: We potentially convert each X column twice, - // since evalPoints also has to convert them. - slice.Convert(&xs, t.MustColumn(s.X)) - slice.Convert(&ys, t.MustColumn(s.Y)) - eval := evals[gid] - - loess := fit.LOESS(xs, ys, s.Degree, s.Span) - nt := new(table.Builder).Add(s.X, eval).Add(s.Y, vec.Map(loess, eval)) - preserveConsts(nt, t) - return nt.Done() - }) -} - -// TODO: Rethink evalPoints/preserveConsts. We probably want an -// interface for "functions" in the mathematical sense that knows how -// to evaluate them at reasonable points and bundle their results into -// a table. OTOH, ECDF uses parts of these, but we don't want to -// evaluate that at regular intervals. - -func evalPoints(g table.Grouping, x string, n int, widen float64, splitGroups bool) map[table.GroupID][]float64 { - var xs []float64 - res := map[table.GroupID][]float64{} - - if n <= 0 { - n = 200 - } - if widen <= 0 { - widen = 1.1 - } - - if !splitGroups { - // Compute combined bounds. - min, max := math.NaN(), math.NaN() - for _, gid := range g.Tables() { - t := g.Table(gid) - slice.Convert(&xs, t.MustColumn(x)) - xmin, xmax := stats.Bounds(xs) - if xmin < min || math.IsNaN(min) { - min = xmin - } - if xmax > max || math.IsNaN(max) { - max = xmax + return Function{ + X: s.X, N: s.N, Domain: s.Domain, + Fn: func(gid table.GroupID, in *table.Table, sampleAt []float64, out *table.Builder) { + if len(sampleAt) == 0 { + out.Add(s.Y, []float64{}) + return } - } - - // Widen bounds. - span := max - min - min, max = min-span*(widen-1)/2, max+span*(widen-1)/2 - - // Create evaluation points. Careful if there's no data. - var eval []float64 - if !math.IsNaN(min) { - eval = vec.Linspace(min, max, n) - } - for _, gid := range g.Tables() { - res[gid] = eval - } - return res - } - - for _, gid := range g.Tables() { - t := g.Table(gid) - // Compute bounds. - slice.Convert(&xs, t.MustColumn(x)) - min, max := stats.Bounds(xs) + // TODO: We potentially convert each X column + // twice, since Function also has to convert + // them. + slice.Convert(&xs, in.MustColumn(s.X)) + slice.Convert(&ys, in.MustColumn(s.Y)) - // Widen bounds. - span := max - min - min, max = min-span*(widen-1)/2, max+span*(widen-1)/2 - - // Create evaluation points. Careful if there's no data. - var eval []float64 - if !math.IsNaN(min) { - eval = vec.Linspace(min, max, n) - } - res[gid] = eval - } - return res -} - -// preserveConsts copies the constant columns from t into nt. -func preserveConsts(nt *table.Builder, t *table.Table) { - for _, col := range t.Columns() { - if nt.Has(col) { - // Don't overwrite existing columns in nt. - continue - } - if cv, ok := t.Const(col); ok { - nt.AddConst(col, cv) - } - } + loess := fit.LOESS(xs, ys, s.Degree, s.Span) + out.Add(s.Y, vec.Map(loess, sampleAt)) + }, + }.F(g) } diff --git a/benchplot/vendor/github.com/aclements/go-gg/ggstat/lsquares.go b/benchplot/vendor/github.com/aclements/go-gg/ggstat/lsquares.go index cb65bd8..53c03b5 100644 --- a/benchplot/vendor/github.com/aclements/go-gg/ggstat/lsquares.go +++ b/benchplot/vendor/github.com/aclements/go-gg/ggstat/lsquares.go @@ -11,9 +11,6 @@ import ( "github.com/aclements/go-moremath/vec" ) -// TODO: Should this keep the type of X and Y the same if they aren't -// just []float64? - // LeastSquares constructs a least squares polynomial regression for // the data (X, Y). // @@ -37,21 +34,9 @@ type LeastSquares struct { // is 0, a reasonable default is used. N int - // Widen sets the domain of the returned LOESS sample points - // to Widen times the span of the data. If Widen is 0, it is - // treated as 1.1 (that is, widen the domain by 10%, or 5% on - // the left and 5% on the right). - // - // TODO: Have a way to specify a specific range? - Widen float64 - - // SplitGroups indicates that each group in the table should - // have separate bounds based on the data in that group alone. - // The default, false, indicates that the bounds should be - // based on all of the data in the table combined. This makes - // it possible to stack LOESS fits and easier to compare them - // across groups. - SplitGroups bool + // Domain specifies the domain at which to sample this function. + // If Domain is nil, it defaults to DomainData{}. + Domain FunctionDomainer // Degree specifies the degree of the fit polynomial. If it is // 0, it is treated as 1. @@ -63,25 +48,20 @@ func (s LeastSquares) F(g table.Grouping) table.Grouping { s.Degree = 1 } - evals := evalPoints(g, s.X, s.N, s.Widen, s.SplitGroups) - var xs, ys []float64 - return table.MapTables(g, func(gid table.GroupID, t *table.Table) *table.Table { - if t.Len() == 0 { - nt := new(table.Builder).Add(s.X, []float64{}).Add(s.Y, []float64{}) - preserveConsts(nt, t) - return nt.Done() - } + return Function{ + X: s.X, N: s.N, Domain: s.Domain, + Fn: func(gid table.GroupID, in *table.Table, sampleAt []float64, out *table.Builder) { + if len(sampleAt) == 0 { + out.Add(s.Y, []float64{}) + return + } - // TODO: We potentially convert each X column twice, - // since evalPoints also has to convert them. - slice.Convert(&xs, t.MustColumn(s.X)) - slice.Convert(&ys, t.MustColumn(s.Y)) - eval := evals[gid] + slice.Convert(&xs, in.MustColumn(s.X)) + slice.Convert(&ys, in.MustColumn(s.Y)) - r := fit.PolynomialRegression(xs, ys, nil, s.Degree) - nt := new(table.Builder).Add(s.X, eval).Add(s.Y, vec.Map(r.F, eval)) - preserveConsts(nt, t) - return nt.Done() - }) + r := fit.PolynomialRegression(xs, ys, nil, s.Degree) + out.Add(s.Y, vec.Map(r.F, sampleAt)) + }, + }.F(g) } diff --git a/benchplot/vendor/github.com/aclements/go-gg/ggstat/normalize.go b/benchplot/vendor/github.com/aclements/go-gg/ggstat/normalize.go index db18ef5..4644246 100644 --- a/benchplot/vendor/github.com/aclements/go-gg/ggstat/normalize.go +++ b/benchplot/vendor/github.com/aclements/go-gg/ggstat/normalize.go @@ -37,10 +37,17 @@ type Normalize struct { By interface{} // Cols is a slice of the names of columns to normalize - // relative to their value in the denominator row. Cols may be - // nil, in which case it defaults to all integral and floating - // point columns. + // relative to the corresponding DenomCols value in the + // denominator row. Cols may be nil, in which case it defaults + // to all integral and floating point columns. Cols []string + + // DenomCols is a slice of the names of columns used as the + // demoninator. DenomCols may be nil, in which case it + // defaults to Cols (i.e. each column will be normalized to + // the value from that column in the denominator row.) + // Otherwise, DenomCols must be the same length as Cols. + DenomCols []string } func (s Normalize) F(g table.Grouping) table.Grouping { @@ -97,8 +104,13 @@ func (s Normalize) F(g table.Grouping) table.Grouping { // Normalize columns. newt := table.NewBuilder(t) + denomCols := s.DenomCols + if denomCols == nil { + denomCols = s.Cols + } for coli, col := range s.Cols { - out := normalizeTo(t.MustColumn(col), drow) + denom := denomValue(t.MustColumn(denomCols[coli]), drow) + out := normalizeTo(t.MustColumn(col), denom) newt.Add(newcols[coli], out) } @@ -134,10 +146,17 @@ func canNormalize(k reflect.Kind) bool { return canNormalizeKinds[k] } -func normalizeTo(s interface{}, index int) interface{} { +func denomValue(s interface{}, index int) float64 { + switch s := s.(type) { + case []float64: + return s[index] + } + return reflect.ValueOf(s).Index(index).Convert(float64Type).Float() +} + +func normalizeTo(s interface{}, denom float64) interface{} { switch s := s.(type) { case []float64: - denom := s[index] out := make([]float64, len(s)) for i, numer := range s { out[i] = numer / denom @@ -146,7 +165,6 @@ func normalizeTo(s interface{}, index int) interface{} { } sv := reflect.ValueOf(s) - denom := sv.Index(index).Convert(float64Type).Float() out := reflect.MakeSlice(float64SliceType, sv.Len(), sv.Len()) for i, len := 0, sv.Len(); i < len; i++ { diff --git a/benchplot/vendor/github.com/aclements/go-moremath/scale/linear.go b/benchplot/vendor/github.com/aclements/go-moremath/scale/linear.go index 4309004..051a6c2 100644 --- a/benchplot/vendor/github.com/aclements/go-moremath/scale/linear.go +++ b/benchplot/vendor/github.com/aclements/go-moremath/scale/linear.go @@ -105,6 +105,34 @@ func (s *Linear) spacingAtLevel(level int, roundOut bool) (firstN, lastN, spacin return } +// CountTicks returns the number of ticks in [s.Min, s.Max] at the +// given tick level. +func (s Linear) CountTicks(level int) int { + return linearTicker{&s, false}.CountTicks(level) +} + +// TicksAtLevel returns the tick locations in [s.Min, s.Max] as a +// []float64 at the given tick level in ascending order. +func (s Linear) TicksAtLevel(level int) interface{} { + return linearTicker{&s, false}.TicksAtLevel(level) +} + +type linearTicker struct { + s *Linear + roundOut bool +} + +func (t linearTicker) CountTicks(level int) int { + firstN, lastN, _ := t.s.spacingAtLevel(level, t.roundOut) + return int(lastN - firstN + 1) +} + +func (t linearTicker) TicksAtLevel(level int) interface{} { + firstN, lastN, spacing := t.s.spacingAtLevel(level, t.roundOut) + n := int(lastN - firstN + 1) + return vec.Linspace(firstN*spacing, lastN*spacing, n) +} + func (s Linear) Ticks(o TickOptions) (major, minor []float64) { if o.Max <= 0 { return nil, nil @@ -114,24 +142,11 @@ func (s Linear) Ticks(o TickOptions) (major, minor []float64) { s.Min, s.Max = s.Max, s.Min } - // nticksAtLevel returns the number of ticks in [s.Min, s.Max] - // at the given level. - nticksAtLevel := func(level int) int { - firstN, lastN, _ := s.spacingAtLevel(level, false) - return int(lastN - firstN + 1) - } - - ticksAtLevel := func(level int) []float64 { - firstN, lastN, spacing := s.spacingAtLevel(level, false) - n := int(lastN - firstN + 1) - return vec.Linspace(firstN*spacing, lastN*spacing, n) - } - - level, ok := o.FindLevel(nticksAtLevel, ticksAtLevel, s.guessLevel()) + level, ok := o.FindLevel(linearTicker{&s, false}, s.guessLevel()) if !ok { return nil, nil } - return ticksAtLevel(level), ticksAtLevel(level - 1) + return s.TicksAtLevel(level).([]float64), s.TicksAtLevel(level - 1).([]float64) } func (s *Linear) Nice(o TickOptions) { @@ -142,18 +157,7 @@ func (s *Linear) Nice(o TickOptions) { s.Min, s.Max = s.Max, s.Min } - nticksAtLevel := func(level int) int { - firstN, lastN, _ := s.spacingAtLevel(level, true) - return int(lastN - firstN + 1) - } - - ticksAtLevel := func(level int) []float64 { - firstN, lastN, spacing := s.spacingAtLevel(level, true) - n := int(lastN - firstN + 1) - return vec.Linspace(firstN*spacing, lastN*spacing, n) - } - - level, ok := o.FindLevel(nticksAtLevel, ticksAtLevel, s.guessLevel()) + level, ok := o.FindLevel(linearTicker{s, true}, s.guessLevel()) if !ok { return } diff --git a/benchplot/vendor/github.com/aclements/go-moremath/scale/log.go b/benchplot/vendor/github.com/aclements/go-moremath/scale/log.go index 872ff56..d9f84ed 100644 --- a/benchplot/vendor/github.com/aclements/go-moremath/scale/log.go +++ b/benchplot/vendor/github.com/aclements/go-moremath/scale/log.go @@ -130,58 +130,64 @@ func (s *Log) spacingAtLevel(level int, roundOut bool) (firstN, lastN, ebase flo return } -func (s *Log) tickFuncs(roundOut bool) (func(level int) int, func(level int) []float64) { - neg, min, max := s.ebounds() +func (s *Log) CountTicks(level int) int { + return logTicker{s, false}.CountTicks(level) +} - // nticksAtLevel returns the number of ticks in [min, max] at - // the given level. - nticksAtLevel := func(level int) int { - if level < 0 { - const maxInt = int(^uint(0) >> 1) - return maxInt - } +func (s *Log) TicksAtLevel(level int) interface{} { + return logTicker{s, false}.TicksAtLevel(level) +} - firstN, lastN, _ := s.spacingAtLevel(level, roundOut) - return int(lastN - firstN + 1) - } - - ticksAtLevel := func(level int) []float64 { - ticks := []float64{} - - if level < 0 { - // Minor ticks for level 0. Get the major - // ticks, but round out so we can fill in - // minor ticks outside of the major ticks. - firstN, lastN, _ := s.spacingAtLevel(0, true) - for n := firstN; n <= lastN; n++ { - tick := math.Pow(float64(s.Base), n) - step := tick - for i := 0; i < s.Base-1; i++ { - if min <= tick && tick <= max { - ticks = append(ticks, tick) - } - tick += step +type logTicker struct { + s *Log + roundOut bool +} + +func (t logTicker) CountTicks(level int) int { + if level < 0 { + const maxInt = int(^uint(0) >> 1) + return maxInt + } + + firstN, lastN, _ := t.s.spacingAtLevel(level, t.roundOut) + return int(lastN - firstN + 1) +} + +func (t logTicker) TicksAtLevel(level int) interface{} { + neg, min, max := t.s.ebounds() + ticks := []float64{} + + if level < 0 { + // Minor ticks for level 0. Get the major + // ticks, but round out so we can fill in + // minor ticks outside of the major ticks. + firstN, lastN, _ := t.s.spacingAtLevel(0, true) + for n := firstN; n <= lastN; n++ { + tick := math.Pow(float64(t.s.Base), n) + step := tick + for i := 0; i < t.s.Base-1; i++ { + if min <= tick && tick <= max { + ticks = append(ticks, tick) } - } - } else { - firstN, lastN, base := s.spacingAtLevel(level, roundOut) - for n := firstN; n <= lastN; n++ { - ticks = append(ticks, math.Pow(base, n)) + tick += step } } - - if neg { - // Negate and reverse order of ticks. - for i := 0; i < (len(ticks)+1)/2; i++ { - j := len(ticks) - i - 1 - ticks[i], ticks[j] = -ticks[j], -ticks[i] - } + } else { + firstN, lastN, base := t.s.spacingAtLevel(level, t.roundOut) + for n := firstN; n <= lastN; n++ { + ticks = append(ticks, math.Pow(base, n)) } + } - return ticks + if neg { + // Negate and reverse order of ticks. + for i := 0; i < (len(ticks)+1)/2; i++ { + j := len(ticks) - i - 1 + ticks[i], ticks[j] = -ticks[j], -ticks[i] + } } - return nticksAtLevel, ticksAtLevel + return ticks } func (s Log) Ticks(o TickOptions) (major, minor []float64) { @@ -190,13 +196,13 @@ func (s Log) Ticks(o TickOptions) (major, minor []float64) { } else if s.Min == s.Max { return []float64{s.Min}, []float64{s.Max} } - count, ticks := s.tickFuncs(false) + t := logTicker{&s, false} - level, ok := o.FindLevel(count, ticks, 0) + level, ok := o.FindLevel(t, 0) if !ok { return nil, nil } - return ticks(level), ticks(level - 1) + return t.TicksAtLevel(level).([]float64), t.TicksAtLevel(level - 1).([]float64) } func (s *Log) Nice(o TickOptions) { @@ -204,9 +210,9 @@ func (s *Log) Nice(o TickOptions) { return } neg, _, _ := s.ebounds() - count, ticks := s.tickFuncs(true) + t := logTicker{s, true} - level, ok := o.FindLevel(count, ticks, 0) + level, ok := o.FindLevel(t, 0) if !ok { return } diff --git a/benchplot/vendor/github.com/aclements/go-moremath/scale/ticks.go b/benchplot/vendor/github.com/aclements/go-moremath/scale/ticks.go index 955bcff..8edf548 100644 --- a/benchplot/vendor/github.com/aclements/go-moremath/scale/ticks.go +++ b/benchplot/vendor/github.com/aclements/go-moremath/scale/ticks.go @@ -20,37 +20,40 @@ type TickOptions struct { // levels to accept, respectively. If they are both 0, there is // no limit on acceptable tick levels. MinLevel, MaxLevel int +} + +// A Ticker computes tick marks for a scale. The "level" of the ticks +// controls how many ticks there are and how closely they are spaced. +// Higher levels have fewer ticks, while lower levels have more ticks. +// For example, on a numerical scale, one could have ticks at every +// n*(10^level). +type Ticker interface { + // CountTicks returns the number of ticks at level in this + // scale's input range. This is equivalent to + // len(TicksAtLevel(level)), but should be much more + // efficient. CountTicks is a weakly monotonically decreasing + // function of level. + CountTicks(level int) int - // Pred returns true if ticks is an acceptable set of major - // ticks. ticks will be in increasing order. Pred must be - // "monotonic" in level in the following sense: if Pred is - // false for level l (or ticks t), it must be false for all l' - // < l (or len(t') > len(t)), and if Pred is true for level l - // (or ticks t), it must be true for all l' > l (or len(t') < - // len(t)). In other words, Pred should return false if there - // are "too many" ticks or they are "too close together". - // - // If Pred is nil, it is assumed to always be satisfied. - Pred func(ticks []float64, level int) bool + // TicksAtLevel returns a slice of "nice" tick values in + // increasing order at level in this scale's input range. + // Typically, TicksAtLevel(l+1) is a subset of + // TicksAtLevel(l). That is, higher levels remove ticks from + // lower levels. + TicksAtLevel(level int) interface{} } // FindLevel returns the lowest level that satisfies the constraints // given by o: // -// * count(level) <= o.Max +// * ticker.CountTicks(level) <= o.Max // // * o.MinLevel <= level <= o.MaxLevel (if MinLevel and MaxLevel != 0). // -// * o.Pred(ticks(level), level) is true (if o.Pred != nil). -// // If the constraints cannot be satisfied, it returns 0, false. // -// ticks(level) must return the tick marks at level in increasing -// order. count(level) must return len(ticks(level)), but should do so -// without constructing the ticks array because it may be very large. -// count must be a weakly monotonically decreasing function of level. // guess is the level to start the optimization at. -func (o *TickOptions) FindLevel(count func(level int) int, ticks func(level int) []float64, guess int) (int, bool) { +func (o *TickOptions) FindLevel(ticker Ticker, guess int) (int, bool) { minLevel, maxLevel := o.MinLevel, o.MaxLevel if minLevel == 0 && maxLevel == 0 { minLevel, maxLevel = -1000, 1000 @@ -70,12 +73,12 @@ func (o *TickOptions) FindLevel(count func(level int) int, ticks func(level int) } // Optimize count against o.Max. - if count(l) <= o.Max { + if ticker.CountTicks(l) <= o.Max { // We're satisfying the o.Max and min/maxLevel // constraints. count is monotonically decreasing, so // decrease level to increase the count until we // violate either o.Max or minLevel. - for l--; l >= minLevel && count(l) <= o.Max; l-- { + for l--; l >= minLevel && ticker.CountTicks(l) <= o.Max; l-- { } // We went one too far. l++ @@ -83,7 +86,7 @@ func (o *TickOptions) FindLevel(count func(level int) int, ticks func(level int) // We're over o.Max. Increase level to decrease the // count until we go below o.Max. This may cause us to // violate maxLevel. - for l++; l <= maxLevel && count(l) > o.Max; l++ { + for l++; l <= maxLevel && ticker.CountTicks(l) > o.Max; l++ { } if l > maxLevel { // We can't satisfy both o.Max and maxLevel. @@ -94,18 +97,5 @@ func (o *TickOptions) FindLevel(count func(level int) int, ticks func(level int) // At this point l is the lowest value that satisfies the // o.Max, minLevel, and maxLevel constraints. - // Optimize ticks against o.Pred. - if o.Pred != nil { - // Increase level until Pred is satisfied. This may - // cause us to violate maxLevel. - for l <= maxLevel && !o.Pred(ticks(l), l) { - l++ - } - if l > maxLevel { - // We can't satisfy both maxLevel and Pred. - return 0, false - } - } - return l, true } diff --git a/benchplot/vendor/github.com/aclements/go-moremath/scale/ticks_test.go b/benchplot/vendor/github.com/aclements/go-moremath/scale/ticks_test.go index 7cdadd8..e7eca34 100644 --- a/benchplot/vendor/github.com/aclements/go-moremath/scale/ticks_test.go +++ b/benchplot/vendor/github.com/aclements/go-moremath/scale/ticks_test.go @@ -6,28 +6,32 @@ package scale import "testing" -func TestTicks(t *testing.T) { - count := func(level int) int { - c := 10 - level - if c < 1 { - c = 1 - } - return c +type testTicker struct{} + +func (testTicker) CountTicks(level int) int { + c := 10 - level + if c < 1 { + c = 1 } - ticks := func(level int) []float64 { - m := make([]float64, count(level)) - for i := 0; i < len(m); i++ { - m[i] = float64(i) - } - return m + return c +} + +func (t testTicker) TicksAtLevel(level int) interface{} { + m := make([]float64, t.CountTicks(level)) + for i := 0; i < len(m); i++ { + m[i] = float64(i) } + return m +} + +func TestTicks(t *testing.T) { check := func(o TickOptions, want int) { wantL, wantOK := want, true if want == -999 { wantL, wantOK = 0, false } for _, guess := range []int{0, -50, 50} { - l, ok := o.FindLevel(count, ticks, guess) + l, ok := o.FindLevel(testTicker{}, guess) if l != wantL || ok != wantOK { t.Errorf("%+v.FindLevel with guess %v returned %v, %v; wanted %v, %v", o, guess, l, ok, wantL, wantOK) } @@ -52,12 +56,4 @@ func TestTicks(t *testing.T) { check(TickOptions{Max: 6, MaxLevel: 9}, 4) check(TickOptions{Max: 6, MaxLevel: 3}, -999) check(TickOptions{Max: 6, MinLevel: 10, MaxLevel: 11}, 10) - - // Predicate always matches. - check(TickOptions{Max: 6, Pred: func(t []float64, level int) bool { return true }}, 4) - // Predicate matches in the middle of the satisfiable region. - check(TickOptions{Max: 6, Pred: func(t []float64, level int) bool { return level >= 6 }}, 6) - check(TickOptions{Max: 6, MinLevel: 5, MaxLevel: 1000, Pred: func(t []float64, level int) bool { return level >= 6 }}, 6) - // Predicate does not match in the satisfiable region. - check(TickOptions{Max: 6, MaxLevel: 5, Pred: func(t []float64, level int) bool { return level >= 6 }}, -999) }