From 738a504ad369bd9c3ed637dca6cbdab0b93c809f Mon Sep 17 00:00:00 2001 From: Thomas Stromberg Date: Tue, 8 Oct 2013 15:29:58 -0700 Subject: [PATCH] "uniq" history entries before using them as a query source, as a history may contain dozens of subsequent entries for the same hostname. --- history/filter.go | 37 ++++++++++++++++++++++++++++++++----- history/history.go | 2 +- ui/ui.go | 2 +- 3 files changed, 34 insertions(+), 7 deletions(-) diff --git a/history/filter.go b/history/filter.go index 334651d..a3aab36 100644 --- a/history/filter.go +++ b/history/filter.go @@ -4,6 +4,7 @@ package history import ( "code.google.com/p/go.net/publicsuffix" "log" + "math/rand" "net/url" "regexp" ) @@ -26,8 +27,8 @@ func isPossiblyInternal(addr string) bool { return false } -// Filter out external hostnames from history, with a limit of X records (may be 0). -func ExternalHostnames(entries []string, limit int) (hostnames []string) { +// Filter out external hostnames from history +func ExternalHostnames(entries []string) (hostnames []string) { counter := 0 for _, uString := range entries { @@ -38,11 +39,37 @@ func ExternalHostnames(entries []string, limit int) (hostnames []string) { } if !isPossiblyInternal(u.Host) { counter += 1 - if limit > 0 && counter > limit { - return - } hostnames = append(hostnames, u.Host) } } return } + +// Filter input array for unique entries. +func Uniq(input []string) (output []string) { + last := "" + for _, i := range input { + if i != last { + output = append(output, i) + } + } + return +} + +// Randomly select X number of entries. +func Random(count int, input []string) (output []string) { + selected := make(map[int]bool) + + for { + if len(selected) >= count { + return + } + index := rand.Intn(len(input)) + // If we have already picked this number, re-roll. + if _, exists := selected[index]; exists == true { + continue + } + output = append(output, input[index]) + selected[index] = true + } +} diff --git a/history/history.go b/history/history.go index e455c1a..37f8c3e 100644 --- a/history/history.go +++ b/history/history.go @@ -47,7 +47,7 @@ func Chrome(days int) (urls []string, err error) { LEFT JOIN urls ON visits.url = urls.id WHERE (visit_time - 11644473600000000 > strftime('%%s', date('now', '-%d day')) * 1000000) - ORDER BY RANDOM();`, days) + ORDER BY visit_time DESC`, days) for _, p := range paths { path := os.ExpandEnv(p) diff --git a/ui/ui.go b/ui/ui.go index 927c523..cc83b8f 100644 --- a/ui/ui.go +++ b/ui/ui.go @@ -30,7 +30,7 @@ func Index(w http.ResponseWriter, r *http.Request) { } q := dnsqueue.StartQueue(QUEUE_LENGTH, WORKERS) - hostnames := history.ExternalHostnames(records, 64) + hostnames := history.Random(16, history.Uniq(history.ExternalHostnames(records))) for _, record := range hostnames { q.Add("8.8.8.8:53", "A", record+".")