-
Notifications
You must be signed in to change notification settings - Fork 0
/
bst.go
659 lines (546 loc) · 15.9 KB
/
bst.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
package main
import (
"bufio"
"errors"
"flag"
"fmt"
"log"
"os"
"runtime"
"strconv"
"strings"
"sync"
"text/scanner"
"time"
)
/*****************************************************************************************/
type Jobs struct {
tree *Node
tree_id int
name string
}
type InputArgs struct {
hash_workers *int
data_workers *int
comp_workers *int
input_file *string
run_mode int
}
type WorkerArgs struct {
sequential bool
eq_hash_data_workers bool
hash_only bool
map_only bool
special_case bool
}
type Node struct {
value int
left *Node
right *Node
}
type HashTreePair struct {
hash int
tree_id int
}
// Pre-process input args
func args_parser() InputArgs {
f := flag.String("input", "", "a string")
h := flag.Int("hash-workers", 1, "an int")
d := flag.Int("data-workers", 0, "an int")
c := flag.Int("comp-workers", 0, "an int")
flag.Parse()
args := InputArgs{hash_workers: h, data_workers: d, comp_workers: c, input_file: f}
return args
}
// Build worker args pack
func load_args(args InputArgs) WorkerArgs {
var hash_only, map_only, eq_hash_data_workers, sequential, special_case bool
if *args.hash_workers == 1 {
sequential = true
} else {
sequential = false
}
if *args.data_workers == 0 {
hash_only = true
} else {
hash_only = false
}
if *args.comp_workers == 0 {
map_only = true
} else {
map_only = false
}
// Equal number of hash and data workers
if !sequential && (*args.hash_workers == *args.data_workers) {
eq_hash_data_workers = true
} else {
eq_hash_data_workers = false
}
// Case: hash-workers > data-workers > 1
if !sequential && *args.data_workers > 1 && !eq_hash_data_workers {
special_case = true
} else {
special_case = false
}
worker_args := WorkerArgs{map_only: map_only, hash_only: hash_only,
eq_hash_data_workers: eq_hash_data_workers,
sequential: sequential, special_case: special_case}
return worker_args
}
// Hash-making method
func (tree *Node) computeHash() int {
var hash int = 1
var result []int
var new_val int
for _, value := range tree.in_order_traversal(result) {
new_val = value + 2
hash = (hash*new_val + new_val) % 1000
}
//fmt.Println("hash:", hash)
return hash
}
// In-order tree traversal Method for hash computation
func (tree *Node) in_order_traversal(result []int) []int {
if tree != nil {
result = tree.right.in_order_traversal(result)
result = append(result, tree.value)
result = tree.left.in_order_traversal(result)
}
return result
}
// Node Insertion Method
func (n *Node) Insert(value int) {
if value > n.value {
if n.right == nil {
n.right = &Node{value: value}
} else {
n.right.Insert(value)
}
} else {
if n.left == nil {
n.left = &Node{value: value}
} else {
n.left.Insert(value)
}
}
}
// Builds BST from input text file
func buildTrees(file *string, all_trees *[]*Node) {
f, err := os.Open(*file)
if err != nil {
log.Fatal(err)
}
f_scan := bufio.NewScanner(f)
for f_scan.Scan() {
var s scanner.Scanner
s.Init(strings.NewReader(f_scan.Text()))
var new_line bool = true
var tree *Node
for tok := s.Scan(); tok != scanner.EOF; tok = s.Scan() {
tokenized, _ := strconv.Atoi(s.TokenText())
if new_line {
new_line = false
tree = &Node{value: tokenized}
} else {
tree.Insert(tokenized)
}
}
*all_trees = append(*all_trees, tree)
}
f.Close()
}
// Compare Trees
func sameTrees(a *Node, b *Node) bool {
var a_flat, b_flat []int
a_flat, b_flat = a.in_order_traversal(a_flat), b.in_order_traversal(b_flat)
for i := 0; i < len(a_flat); i++ {
if a_flat[i] != b_flat[i] {
return false
}
}
return true
}
// Parallel Hash computation with map update (non-special case)
func parallel_hash(job_ch <-chan Jobs, hash_map *map[int][]int,
wg *sync.WaitGroup, mutex *sync.Mutex, queue chan<- HashTreePair, parallel_update bool) {
if parallel_update {
//fmt.Println("Workers Map update using Mutex Lock")
//start := time.Now()
for job := range job_ch {
var hash int = job.tree.computeHash()
mutex.Lock() // Lock hashmap while updating
(*hash_map)[hash] = append((*hash_map)[hash], job.tree_id)
mutex.Unlock()
wg.Done() // decrement WaitGroup by 1
}
//elapsed := time.Since(start)
//fmt.Printf("One Mutex lock per thread update %s\n", elapsed)
}
// One central manager channel aggregates all hash-pairs for map update
if !parallel_update {
//fmt.Println("One-Channel Map Update")
for job := range job_ch {
var hash int = job.tree.computeHash()
hash_pair := HashTreePair{hash: hash, tree_id: job.tree_id}
queue <- hash_pair
wg.Done() // decrement WaitGroup by 1
}
}
}
// Parallel grouping with Mutex Lock
func mutex_map(queue <-chan HashTreePair, hash_map *map[int][]int,
wg *sync.WaitGroup, mutex *sync.Mutex) {
for job := range queue {
mutex.Lock()
(*hash_map)[job.hash] = append((*hash_map)[job.hash], job.tree_id)
mutex.Unlock()
wg.Done()
}
}
// Driver function used in main()
func run_all(all_trees *[]*Node, hash_map *map[int][]int, same_trees *map[int][]int, args InputArgs, worker_args WorkerArgs) {
var wg sync.WaitGroup
var wg_data sync.WaitGroup // for hash-workers > data-workers > 1
var mutex sync.Mutex
var mutex_data sync.Mutex // for hash-workers > data-workers > 1
buildTrees(args.input_file, all_trees)
job_ch := make(chan Jobs, len(*all_trees)) // one hash channel for each tree
map_ch := make(chan HashTreePair, len(*all_trees)) // one map update channel for each tree
visited_trees := make(map[int]VisitRecord)
//fmt.Println("all_trees", &all_trees)
/*** Part I: Sequential Hash Tree Computation, Grouping and Comparison ***/
if worker_args.sequential {
// Sequential Hashing-only (1/0/0)
if worker_args.hash_only && worker_args.map_only {
start := time.Now()
for _, tree := range *all_trees {
tree.computeHash()
}
hashTime := time.Since(start)
fmt.Printf("hashTime (1/0/0 sequential) %s\n", hashTime)
return
}
// Sequential Hashing + Grouping (1/1/0)
if !worker_args.hash_only && worker_args.map_only {
group_start := time.Now()
for tree_id, tree := range *all_trees {
var hash int = tree.computeHash()
(*hash_map)[hash] = append((*hash_map)[hash], tree_id)
}
hashGroupTime := time.Since(group_start)
fmt.Printf("hashGroupTime (1/1/0 sequential) %s\n", hashGroupTime)
}
// Sequential Hashing + Grouping + Comparison (1/1/1)
if !worker_args.hash_only && !worker_args.map_only {
group_start := time.Now()
for tree_id, tree := range *all_trees {
var hash int = tree.computeHash()
(*hash_map)[hash] = append((*hash_map)[hash], tree_id)
}
hashGroupTime := time.Since(group_start)
fmt.Printf("hashGroupTime (1/1/_ sequential) %s\n", hashGroupTime)
comp_start := time.Now()
compare_trees(all_trees, hash_map, same_trees)
compareTreeTime := time.Since(comp_start)
fmt.Printf("compareTreeTime (1/1/1 sequential) %s\n", compareTreeTime)
return
}
}
/*** Part II: Parallel Hash Tree Computation and Grouping ***/
if !worker_args.sequential {
// Parallel hashing
wg.Add(len(*all_trees)) // Add one WaitGroup per tree for parallel_hash gorountines
fmt.Println("Parallel Map Update", worker_args.eq_hash_data_workers, "on", len(*all_trees), "trees")
p_hash_start := time.Now()
// Send trees into job_ch channel for parallel_hash goroutines
for i, tree := range *all_trees {
job_ch <- Jobs{tree: tree, name: fmt.Sprintf("JobID::%d", i), tree_id: i}
}
// Launch goroutines per specified number of hash workers
for id := 0; id < *args.hash_workers; id++ {
go parallel_hash(job_ch, hash_map,
&wg, &mutex, map_ch, worker_args.eq_hash_data_workers)
}
close(job_ch) // Close channel for hashing jobs
wg.Wait()
//close(map_ch) // Close channel for hash map update
hashTime := time.Since(p_hash_start)
fmt.Printf("hashTime (i/0/0 parallel) %s\n", hashTime)
close(map_ch) // Close channel for hash map update
// Exit if ONLY hash-workers is specified
if worker_args.hash_only {
return
}
// KEEP???
//"Central Manager" channel updates hashmap
//for pair := range map_ch {
// (*hash_map)[pair.hash] = append((*hash_map)[pair.hash], pair.tree_id)
//}
// Case: data-workers = 1 or same as hash-workers
if !worker_args.hash_only && (*args.data_workers == 1 || worker_args.eq_hash_data_workers) {
for pair := range map_ch {
(*hash_map)[pair.hash] = append((*hash_map)[pair.hash], pair.tree_id)
}
hashGroupTime := time.Since(p_hash_start)
fmt.Printf("hashGroupTime (i/1/0 i/i/0) %s\n", hashGroupTime)
}
//Extra Credit: hash-workers > data-workers > 1
if *args.data_workers != 1 && !worker_args.eq_hash_data_workers {
//if !worker_args.eq_hash_data_workers && *args.data_workers > 1 {
wg_data.Add(len(*all_trees))
start := time.Now()
for id := 0; id < *args.data_workers; id++ {
go mutex_map(map_ch, hash_map,
&wg_data, &mutex_data)
}
wg_data.Wait()
elapsed := time.Since(start) + hashTime // include hashing time
fmt.Printf("hashGroupTime (i/j/0 parallel) %s\n", elapsed)
}
// Print Hash Groups of size > 1
if *args.data_workers > 0 {
for hash, tree_id := range *hash_map {
if len(tree_id) > 1 {
fmt.Printf("%d:", hash)
for _, id := range tree_id {
fmt.Printf(" %d", id)
}
fmt.Println()
}
}
}
}
// Terminate upon tree grouping if no comp-worker is specified.
if worker_args.map_only {
return
}
/*** Part III: Parallel Tree Comparison ***/
if *args.comp_workers == 1 {
comp_start := time.Now()
compare_trees(all_trees, hash_map, same_trees)
compareTreeTime := time.Since(comp_start)
fmt.Printf("compareTreeTime (n/n/1 sequential) %s\n", compareTreeTime)
} else {
comp_start := time.Now()
compare_trees_parallel(all_trees, hash_map, same_trees, *args.comp_workers, &visited_trees)
compareTreeTime := time.Since(comp_start)
fmt.Printf("compareTreeTime (n/n/n parallel) %s\n", compareTreeTime)
}
// Print Tree Groups
for group_id, tree_id := range *same_trees {
if len(tree_id) > 1 {
fmt.Printf("group %d:", group_id)
for _, id := range tree_id {
fmt.Printf(" %d", id)
}
fmt.Println()
}
}
}
func main() {
var all_trees []*Node
hash_map := make(map[int][]int)
same_trees := make(map[int][]int)
var args InputArgs = args_parser()
fmt.Println("hash workers=", *args.hash_workers, "data workers=", *args.data_workers, "comp workers=",
*args.comp_workers, "input file=", *args.input_file)
fmt.Println("Base num of cores for thread assignment available:", runtime.GOMAXPROCS(-1))
worker_args := load_args(args)
run_all(&all_trees, &hash_map, &same_trees, args, worker_args)
}
type CompJobs struct {
space int
pairs []TreePair
mutex *sync.Mutex
}
// Method for adding tree pair to job queue
func (job *CompJobs) Insert(pair TreePair) bool {
job.mutex.Lock()
defer job.mutex.Unlock()
if job.space > len(job.pairs) { // Check for room in buffer
job.pairs = append(job.pairs, pair)
return true
} else {
return false
}
}
// Method for poping the oldest tree pair from job queue
func (job *CompJobs) Remove() (TreePair, error) {
job.mutex.Lock()
defer job.mutex.Unlock()
var pair TreePair
if len(job.pairs) > 0 {
pair := job.pairs[0]
job.pairs = job.pairs[1:]
return pair, nil
} else {
return pair, errors.New("Empty")
}
}
// CreateQueue creates an empty queue with desired capacity
func MakeCompJobs(capacity int, mutex *sync.Mutex) *CompJobs {
return &CompJobs{
space: capacity,
pairs: make([]TreePair, 0, capacity),
mutex: mutex,
}
}
type TreePair struct {
group_id int
id_a int
id_b int
}
type VisitRecord struct {
visited bool
group_id int
}
func register_same_trees(a_id int, b_id int,
mutex *sync.Mutex, visited_trees *map[int]VisitRecord) {
var id int
var a_checked, b_checked VisitRecord
var a_ok, b_ok bool
mutex.Lock()
defer mutex.Unlock()
a_checked, a_ok = (*visited_trees)[a_id]
b_checked, b_ok = (*visited_trees)[b_id]
if !a_ok && !b_ok {
if a_id < b_id {
id = a_id
} else {
id = b_id
}
visited := VisitRecord{visited: true, group_id: id}
(*visited_trees)[a_id] = visited
(*visited_trees)[b_id] = visited
return
}
if a_ok && b_ok {
if a_id < b_id {
id = a_checked.group_id
} else {
id = b_checked.group_id
}
visited := VisitRecord{visited: true, group_id: id}
(*visited_trees)[b_id] = visited
(*visited_trees)[a_id] = visited
return
}
if a_ok && !b_ok {
(*visited_trees)[b_id] = a_checked
return
}
if b_ok && !a_ok {
(*visited_trees)[a_id] = b_checked
return
}
return
}
// ???
func comp_worker_run(job *CompJobs, all_trees *[]*Node,
mutex_visited *sync.Mutex, visited_trees *map[int]VisitRecord,
continue_working <-chan int, wg *sync.WaitGroup) {
for {
var pair TreePair
var err error
// Prevent work removal from an empty buffer
for {
_, ok := <-continue_working
if !ok {
return
}
pair, err = job.Remove()
if err == nil {
break
}
time.Sleep(1 * time.Millisecond)
}
var curr_node, next_node *Node = (*all_trees)[pair.id_a], (*all_trees)[pair.id_b]
if sameTrees(curr_node, next_node) {
register_same_trees(pair.id_a, pair.id_b,
mutex_visited, visited_trees)
}
wg.Done()
}
}
// ???
func compare_trees_parallel(all_trees *[]*Node, hash_map *map[int][]int,
same_trees *map[int][]int, comp_workers int,
visited_trees *map[int]VisitRecord) {
var mutex_comp sync.Mutex
var mutex_visited sync.Mutex
var wg sync.WaitGroup
comp_jobs := MakeCompJobs(comp_workers, &mutex_comp) //pointer!!
// REPLACE this with signal-only struct {} ???
continue_working := make(chan int)
// Launch tree comparison goroutines
for i := 0; i < comp_workers; i++ {
go comp_worker_run(comp_jobs, all_trees, &mutex_visited, visited_trees, continue_working, &wg)
}
// Feed tree pairs into pipeline channel
for group_id, tree_ids := range *hash_map { //flatten out elements in hashmap
if len(tree_ids) > 1 {
for i := 0; i < len(tree_ids); i++ {
for j := i + 1; j < (len(tree_ids)); j++ {
pair := TreePair{group_id: group_id, id_a: tree_ids[i], id_b: tree_ids[j]}
wg.Add(1)
// Prevent main thread from inserting work into a full buffer
for !comp_jobs.Insert(pair) {
//sleep the main thread until queue is freed
time.Sleep(1 * time.Millisecond)
}
continue_working <- 1
}
}
}
}
wg.Wait() // Let goroutines synchronize
close(continue_working)
final_map := make(map[int]int)
var count int = 0
var exists bool
var id int
for key, value := range *visited_trees {
id, exists = final_map[value.group_id]
if !exists {
final_map[value.group_id] = count
id = count
count++
}
(*same_trees)[id] = append((*same_trees)[id], key)
}
}
// ???
func compare_trees(all_trees *[]*Node, hash_map *map[int][]int,
same_trees *map[int][]int) {
var tree_group int = -1
for _, tree_ids := range *hash_map {
this_group_visited := make(map[int]bool)
if len(tree_ids) > 1 {
//fmt.Printf("Compare values in key: %d\n", hash, tree_ids)
//fmt.Println(hash, tree_ids)
for i := 0; i < len(tree_ids); i++ {
if !this_group_visited[i] {
//node hasn't been visited yet, create new group in tree
tree_group++
(*same_trees)[tree_group] = append((*same_trees)[tree_group], tree_ids[i])
//fmt.Println("same_trees:", *same_trees)
var node *Node = (*all_trees)[tree_ids[i]]
this_group_visited[i] = true
for j := i + 1; j < len(tree_ids); j++ {
if !this_group_visited[j] {
//next node hasn't been visited, compare with node
var next_node *Node = (*all_trees)[tree_ids[j]]
var equal bool = sameTrees(node, next_node)
if equal {
(*same_trees)[tree_group] = append((*same_trees)[tree_group], tree_ids[j])
this_group_visited[j] = true //grouped nextnode, remove it from iterations
}
}
}
}
}
} // else no need to print groups with only one tree
//fmt.Println("same_trees:", *same_trees)
}
//fmt.Println("same_trees:", *same_trees)
}