-
Notifications
You must be signed in to change notification settings - Fork 0
/
minhash.go
61 lines (53 loc) · 1.17 KB
/
minhash.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
package minhash
import (
"math"
"math/rand"
)
const (
// Prime is the smallest prime larger than the largest
// possible hash value (max hash = 32 bit int)
Prime uint64 = 4294967311
MaxHash float64 = 1<<32 - 1
)
type Minhash struct {
permutationsNumber int
permArgs map[int][]uint32
rand *rand.Rand
}
func New(permNumber int) Minhash {
m := Minhash{permutationsNumber: permNumber, rand: rand.New(rand.NewSource(math.MaxInt64))}
m.init()
return m
}
func NewWithSeed(permNumber int, seed int64) Minhash {
m := Minhash{permutationsNumber: permNumber, rand: rand.New(rand.NewSource(seed))}
m.init()
return m
}
func (m *Minhash) init() {
used := make(map[uint32]bool)
m.permArgs = make(map[int][]uint32)
for i := 0; i < 2; i++ {
numbers := []uint32{}
for j := 0; j < m.permutationsNumber; j++ {
ok := true
for ok {
n := m.randInt()
_, ok = used[n]
if !ok {
used[n] = true
numbers = append(numbers, n)
}
}
}
m.permArgs[i] = numbers
}
}
func (m *Minhash) NewSet(els []string) Set {
s := Set{Elements: els, minhash: m}
s.init()
return s
}
func (m *Minhash) randInt() uint32 {
return m.rand.Uint32()
}