Skip to content

Commit

Permalink
"uniq" history entries before using them as a query source, as a history
Browse files Browse the repository at this point in the history
may contain dozens of subsequent entries for the same hostname.
  • Loading branch information
tstromberg committed Oct 8, 2013
1 parent 232afac commit 738a504
Show file tree
Hide file tree
Showing 3 changed files with 34 additions and 7 deletions.
37 changes: 32 additions & 5 deletions history/filter.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ package history
import (
"code.google.com/p/go.net/publicsuffix"
"log"
"math/rand"
"net/url"
"regexp"
)
Expand All @@ -26,8 +27,8 @@ func isPossiblyInternal(addr string) bool {
return false
}

// Filter out external hostnames from history, with a limit of X records (may be 0).
func ExternalHostnames(entries []string, limit int) (hostnames []string) {
// Filter out external hostnames from history
func ExternalHostnames(entries []string) (hostnames []string) {
counter := 0

for _, uString := range entries {
Expand All @@ -38,11 +39,37 @@ func ExternalHostnames(entries []string, limit int) (hostnames []string) {
}
if !isPossiblyInternal(u.Host) {
counter += 1
if limit > 0 && counter > limit {
return
}
hostnames = append(hostnames, u.Host)
}
}
return
}

// Filter input array for unique entries.
func Uniq(input []string) (output []string) {
last := ""
for _, i := range input {
if i != last {
output = append(output, i)
}
}
return
}

// Randomly select X number of entries.
func Random(count int, input []string) (output []string) {
selected := make(map[int]bool)

for {
if len(selected) >= count {
return
}
index := rand.Intn(len(input))
// If we have already picked this number, re-roll.
if _, exists := selected[index]; exists == true {
continue
}
output = append(output, input[index])
selected[index] = true
}
}
2 changes: 1 addition & 1 deletion history/history.go
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@ func Chrome(days int) (urls []string, err error) {
LEFT JOIN urls ON visits.url = urls.id
WHERE (visit_time - 11644473600000000 >
strftime('%%s', date('now', '-%d day')) * 1000000)
ORDER BY RANDOM();`, days)
ORDER BY visit_time DESC`, days)

for _, p := range paths {
path := os.ExpandEnv(p)
Expand Down
2 changes: 1 addition & 1 deletion ui/ui.go
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ func Index(w http.ResponseWriter, r *http.Request) {
}

q := dnsqueue.StartQueue(QUEUE_LENGTH, WORKERS)
hostnames := history.ExternalHostnames(records, 64)
hostnames := history.Random(16, history.Uniq(history.ExternalHostnames(records)))

for _, record := range hostnames {
q.Add("8.8.8.8:53", "A", record+".")
Expand Down

0 comments on commit 738a504

Please sign in to comment.