-
Notifications
You must be signed in to change notification settings - Fork 3
/
Copy pathcollector.go
115 lines (100 loc) · 1.86 KB
/
collector.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
package main
import (
"net/url"
"sync"
"go.uber.org/zap"
)
func zapURL(u *url.URL) zap.Field {
return zap.String("url", u.String())
}
type Task interface {
Name() string
URLs() <-chan *url.URL
Run(*url.URL) error
Finish()
}
type SourceMap struct {
FileNames []string `json:"sources"`
Contents []string `json:"sourcesContent"`
}
type Collector struct {
Logger *zap.Logger
Output string
Workers int
Debug bool
pages chan *url.URL
}
func (c *Collector) Init() {
c.pages = make(chan *url.URL)
}
func (c *Collector) Add(purl string) error {
parsed, err := url.Parse(purl)
if err != nil {
return err
}
c.pages <- parsed
return nil
}
func (c *Collector) Close() {
close(c.pages)
}
func (c *Collector) Run() {
wg := sync.WaitGroup{}
scripts := make(chan *url.URL)
maps := make(chan *url.URL)
infos := make(chan *url.URL)
c.spawn(&wg, c.runWorkers, &TaskPages{
In: c.pages,
Out: scripts,
})
c.spawn(&wg, c.runWorkers, &TaskScripts{
Logger: c.Logger,
In: scripts,
Out: maps,
Visited: make(map[string]struct{}),
Mutex: &sync.Mutex{},
})
c.spawn(&wg, c.runWorkers, &TaskMaps{
Output: c.Output,
In: maps,
Out: infos,
})
c.worker(&TaskInfos{
Output: c.Output,
In: infos,
})
wg.Wait()
}
func (Collector) spawn(wg *sync.WaitGroup, fn func(Task), task Task) {
wg.Add(1)
go func() {
defer wg.Done()
fn(task)
}()
}
func (c *Collector) runWorkers(task Task) {
wg := sync.WaitGroup{}
for i := 0; i < c.Workers; i++ {
c.spawn(&wg, c.worker, task)
}
wg.Wait()
task.Finish()
}
func (c *Collector) worker(task Task) {
for url := range task.URLs() {
c.Logger.Debug(
"running task",
zap.String("task", task.Name()),
zapURL(url),
)
err := task.Run(url)
if err != nil {
c.Logger.Error(
"task error",
zap.Error(err),
zap.String("task", task.Name()),
zapURL(url),
)
}
}
}