-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathmain.go
138 lines (123 loc) · 4.35 KB
/
main.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
package main
import (
"fmt"
"github.com/yhat/scrape"
"golang.org/x/net/html"
"log"
"net/http"
"os"
"sort"
"strconv"
"strings"
)
type CharFreq struct {
char string
freqCnt int
}
func (c CharFreq) String() string {
return fmt.Sprintf("%s: %d", c.char, c.freqCnt)
}
// Define vars with this scope to be able to get to it later...maybe bad practice that will bite me later.
var codeElems []string
var charFreqs []CharFreq
type ByFreqCntDesc []CharFreq
func (a ByFreqCntDesc) Len() int { return len(a) }
func (a ByFreqCntDesc) Swap(i, j int) { a[i], a[j] = a[j], a[i] }
func (a ByFreqCntDesc) Less(i, j int) bool { return a[i].freqCnt > a[j].freqCnt }
func scrapeAndParse() {
// I could hard-code the strings and avoid getting this page every time...maybe later.
resp, err := http.Get("http://www.fogcreek.com/jobs/supportengineerlevel2")
if err != nil {
panic(err)
}
root, err := html.Parse(resp.Body)
if err != nil {
panic(err)
}
var f func(*html.Node)
f = func(n *html.Node) {
if n.Type == html.ElementNode && n.Data == "code" {
// I tried very hard to avoid taking on `scrape` as a dependency.
// However, reading out the text contents from within an html.Node data type was NOT straightforward for me.
// The `scrape` package made it easy, and the code w/in the package is only about 150 lines.
// At that length, it could be undertood with a little study or even pasted into this project if licensing allows.
codeElems = append(codeElems, scrape.Text(n))
}
for c := n.FirstChild; c != nil; c = c.NextSibling {
f(c)
// Assume we only need the 2 <code> tags on the job posting page, don't parse any further.
if len(codeElems) >= 2 {
break
}
}
}
f(root)
}
func getFreqCnts(codeElems []string) string {
// Make some dicey assumptions about scraped elements here:
// - 1st <code> tag will be the characters to get counts for
// - 2nd <code> tag will be corpus to run freq count on
// Define a slice of characters to get counts for, the corpus string, and another slice for counts.
// I tried a 2-dimensional array to hold both chars to get counts for and the counts,
// but kept getting unexpected results when filling the 2nd dimension, so I ditched it.
// The better way forward was to define a struct to hold letters and counts.
charsToCnt := strings.Split(codeElems[0], "")
corpus := codeElems[1]
var retStr string
// Loop over slice w/ chars to get counts for; get a freq count of each letter in corpus string;
// I was hoping to start a goroutine for each iteration of this loop to gain benefit of concurrency
// (and because I've never played with goroutines before), but I quickly got lost in the semantics of channels, semaphores, etc.
for i, char := range charsToCnt {
charFreqs = append(charFreqs, CharFreq{char, strings.Count(corpus, char)})
retStr = retStr + char + ", " + strconv.Itoa(charFreqs[i].freqCnt) + "; "
}
return retStr
}
func sortAndTrim(charFreqs []CharFreq) string {
// Order by freq cnt, descending.
// drop all chars after (and including) the _ to get the secret word; print to page and stdout
sort.Sort(ByFreqCntDesc(charFreqs))
var secretWord string
for _, charFreq := range charFreqs {
if charFreq.char == "_" {
break
}
secretWord = secretWord + charFreq.char
}
return secretWord
}
func handler(w http.ResponseWriter, r *http.Request) {
log.Println("handler func received request: ", r.URL.Path)
scrapeAndParse()
fmt.Fprintf(w, codeElems[0]+"\n\n")
fmt.Fprintf(w, codeElems[1]+"\n\n")
initCnts := getFreqCnts(codeElems)
fmt.Fprintf(w, initCnts+"\n\n")
secretWord := sortAndTrim(charFreqs)
fmt.Fprintf(w, secretWord)
}
func main() {
http.HandleFunc("/", handler)
port := os.Getenv("PORT")
if port == "" {
// Blank port number expected if running non-dev-mode containter that just prints to stdout.
scrapeAndParse()
//fmt.Println(codeElems)
getFreqCnts(codeElems)
secretWord := sortAndTrim(charFreqs)
fmt.Println(secretWord)
os.Exit(0)
/*
TODO:
- write some tests
- figure out how to adapt setup to run as web handler for debugging and to print to stdout
- this might get ugly/hacky
- or not, might be able to call handler() directly if no port set
- rebuild docker image as jkaplon/fog-creek-supp-eng; push to hub
*/
}
err := http.ListenAndServe(":"+port, nil)
if err != nil {
log.Fatal("Could not listen: ", err)
}
}