-
Notifications
You must be signed in to change notification settings - Fork 68
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
hashmap: Implement a new map implementation
Initial hashmap implementation that supports Get, Set, Delete. Implementation strongly influenced by: https://www.sebastiansylvan.com/post/robin-hood-hashing-should-be-your-default-hash-table-implementation/ Simple benchmarks show that it can be faster than go's map for map[string]interface{} keys. TODO: benchmarks that include deletes/tombstones. It is missing a couple nice properties of Go maps: 1) Go's map can be modified while being iterated. If additions are made to Hashmap while it is being iterated (Iter implementation still TODO), the iterator may encounter keys more than once or not at all. However, Hashmap iteration should behave fine if keys are deleted in the during iteration. 2) Go's map has a built in race detector that runs all the time. Change-Id: I93ba5fb9b6c5b6a71e2e75f5dbc18239464756a7
- Loading branch information
Showing
3 changed files
with
954 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,169 @@ | ||
// Copyright (c) 2020 Arista Networks, Inc. | ||
// Use of this source code is governed by the Apache License 2.0 | ||
// that can be found in the COPYING file. | ||
|
||
package hashmap | ||
|
||
import "math/bits" | ||
|
||
// Hashable represents the key for an entry in a Map that cannot natively be hashed | ||
type Hashable interface { | ||
Hash() uint64 | ||
Equal(other interface{}) bool | ||
} | ||
|
||
// Hashmap implements a hashmap | ||
type Hashmap[K any, V any] struct { | ||
seed uint64 | ||
entries []entry[K, V] | ||
length int | ||
hash func(K) uint64 | ||
equal func(K, K) bool | ||
} | ||
|
||
func New[K any, V any](size uint, hash func(K) uint64, equal func(K, K) bool) *Hashmap[K, V] { | ||
var entries []entry[K, V] | ||
if size != 0 { | ||
entries = make([]entry[K, V], 1<<bits.Len(size-1)) | ||
} | ||
return &Hashmap[K, V]{entries: entries, hash: hash, equal: equal} | ||
} | ||
|
||
type entry[K any, V any] struct { | ||
hash uint64 | ||
key K | ||
value V | ||
occupied bool | ||
tombstone bool | ||
} | ||
|
||
// Len returns the length of m. | ||
func (m *Hashmap[K, V]) Len() int { | ||
return m.length | ||
} | ||
|
||
func (m *Hashmap[K, V]) mask() int { | ||
return len(m.entries) - 1 | ||
} | ||
|
||
func (m *Hashmap[K, V]) position(hash uint64) int { | ||
return int((hash ^ m.seed)) & m.mask() | ||
} | ||
|
||
// Set associates k with v in m. | ||
func (m *Hashmap[K, V]) Set(k K, v V) { | ||
capacity := len(m.entries) | ||
if capacity == 0 { | ||
m.resize(4) | ||
} else if m.length >= int(float64(capacity)*0.9) { | ||
m.resize(capacity * 2) | ||
} | ||
m.set(m.hash(k), k, v) | ||
} | ||
|
||
func (m *Hashmap[K, V]) set(hash uint64, k K, v V) { | ||
position := m.position(hash) | ||
var distance int | ||
for { | ||
existing := &m.entries[position] | ||
if !existing.occupied { | ||
m.entries[position] = entry[K, V]{hash: hash, key: k, value: v, occupied: true} | ||
m.length++ | ||
return | ||
} else if existing.hash == hash && m.equal(existing.key, k) { | ||
existing.value = v | ||
return | ||
} | ||
|
||
existingDistance := position - m.position(existing.hash) | ||
if existingDistance < 0 { | ||
existingDistance += len(m.entries) | ||
} | ||
if distance > existingDistance { | ||
// k is further from its desired position than existing.k, | ||
// steal it's spot and find a new place for existing. | ||
if existing.tombstone { | ||
m.entries[position] = entry[K, V]{hash: hash, key: k, value: v, occupied: true} | ||
m.length++ | ||
return | ||
} | ||
hash, existing.hash = existing.hash, hash | ||
k, existing.key = existing.key, k | ||
v, existing.value = existing.value, v | ||
distance = existingDistance | ||
} else if distance == existingDistance && existing.tombstone { | ||
m.entries[position] = entry[K, V]{hash: hash, key: k, value: v, occupied: true} | ||
m.length++ | ||
return | ||
} | ||
|
||
distance++ | ||
position = (position + 1) & m.mask() | ||
} | ||
} | ||
|
||
// Get gets the value associated with k | ||
func (m *Hashmap[K, V]) Get(k K) (V, bool) { | ||
ent := m.getRef(k) | ||
if ent == nil { | ||
var v V | ||
return v, false | ||
} | ||
return ent.value, true | ||
} | ||
|
||
func (m *Hashmap[K, V]) getRef(k K) *entry[K, V] { | ||
hash := m.hash(k) | ||
position := m.position(hash) | ||
var distance int | ||
for { | ||
ent := &m.entries[position] | ||
if !ent.occupied { | ||
return nil | ||
} | ||
entDistance := position - m.position(ent.hash) | ||
if entDistance < 0 { | ||
entDistance += len(m.entries) | ||
} | ||
if distance > entDistance { | ||
// Our distance has exceeded this entry's distance, we | ||
// would have found our key by now if it was present. | ||
return nil | ||
} | ||
if ent.hash == hash && m.equal(ent.key, k) { | ||
return ent | ||
} | ||
distance++ | ||
position = (position + 1) & m.mask() | ||
} | ||
} | ||
|
||
// Delete removes k from m | ||
func (m *Hashmap[K, V]) Delete(k K) { | ||
ent := m.getRef(k) | ||
if ent == nil { | ||
return | ||
} | ||
// Set the entry to a tombstone. We keep the entry's hash set, so | ||
// that this entry's distance can still be calculated. | ||
var ( | ||
nilK K | ||
nilV V | ||
) | ||
ent.key = nilK | ||
ent.value = nilV | ||
ent.tombstone = true | ||
m.length-- | ||
} | ||
|
||
func (m *Hashmap[K, V]) resize(size int) { | ||
oldEntries := m.entries | ||
m.entries = make([]entry[K, V], size) | ||
m.length = 0 | ||
for _, ent := range oldEntries { | ||
if !ent.occupied || ent.tombstone { | ||
continue | ||
} | ||
m.set(ent.hash, ent.key, ent.value) | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,212 @@ | ||
// Copyright (c) 2019 Arista Networks, Inc. | ||
// Use of this source code is governed by the Apache License 2.0 | ||
// that can be found in the COPYING file. | ||
|
||
package hashmap | ||
|
||
import ( | ||
"fmt" | ||
"math/rand" | ||
"strings" | ||
"testing" | ||
|
||
"github.com/aristanetworks/goarista/key" | ||
) | ||
|
||
type dumbHashable struct { | ||
dumb interface{} | ||
} | ||
|
||
func (d dumbHashable) Equal(other interface{}) bool { | ||
if o, ok := other.(dumbHashable); ok { | ||
return d.dumb == o.dumb | ||
} | ||
return false | ||
} | ||
|
||
func (d dumbHashable) Hash() uint64 { | ||
return 1234567890 | ||
} | ||
|
||
func TestMapSetGet(t *testing.T) { | ||
m := New[Hashable, any](0, | ||
func(h Hashable) uint64 { return h.Hash() }, | ||
func(x, y Hashable) bool { return x.Equal(y) }) | ||
tests := []struct { | ||
setkey interface{} | ||
getkey interface{} | ||
val interface{} | ||
found bool | ||
}{{ | ||
setkey: dumbHashable{dumb: "hashable1"}, | ||
getkey: dumbHashable{dumb: "hashable1"}, | ||
val: 1, | ||
found: true, | ||
}, { | ||
getkey: dumbHashable{dumb: "hashable2"}, | ||
val: nil, | ||
found: false, | ||
}, { | ||
setkey: dumbHashable{dumb: "hashable2"}, | ||
getkey: dumbHashable{dumb: "hashable2"}, | ||
val: 2, | ||
found: true, | ||
}, { | ||
getkey: dumbHashable{dumb: "hashable42"}, | ||
val: nil, | ||
found: false, | ||
}, { | ||
setkey: key.New(map[string]interface{}{"a": int32(1)}), | ||
getkey: key.New(map[string]interface{}{"a": int32(1)}), | ||
val: "foo", | ||
found: true, | ||
}, { | ||
getkey: key.New(map[string]interface{}{"a": int32(2)}), | ||
val: nil, | ||
found: false, | ||
}, { | ||
setkey: key.New(map[string]interface{}{"a": int32(2)}), | ||
getkey: key.New(map[string]interface{}{"a": int32(2)}), | ||
val: "bar", | ||
found: true, | ||
}} | ||
for _, tcase := range tests { | ||
if tcase.setkey != nil { | ||
m.Set(tcase.setkey.(Hashable), tcase.val) | ||
} | ||
val, found := m.Get(tcase.getkey.(Hashable)) | ||
if found != tcase.found { | ||
t.Errorf("found is %t, but expected found %t", found, tcase.found) | ||
} | ||
if val != tcase.val { | ||
t.Errorf("val is %v for key %v, but expected val %v", val, tcase.getkey, tcase.val) | ||
} | ||
} | ||
t.Log(m.debug()) | ||
} | ||
|
||
func BenchmarkMapGrow(b *testing.B) { | ||
keys := make([]key.Key, 150) | ||
for j := 0; j < len(keys); j++ { | ||
keys[j] = key.New(map[string]interface{}{ | ||
"foobar": 100, | ||
"baz": j, | ||
}) | ||
} | ||
b.Run("key.Map", func(b *testing.B) { | ||
b.ReportAllocs() | ||
for i := 0; i < b.N; i++ { | ||
m := key.NewMap() | ||
for j := 0; j < len(keys); j++ { | ||
m.Set(keys[j], "foobar") | ||
} | ||
if m.Len() != len(keys) { | ||
b.Fatal(m) | ||
} | ||
} | ||
}) | ||
b.Run("Hashmap", func(b *testing.B) { | ||
b.ReportAllocs() | ||
for i := 0; i < b.N; i++ { | ||
m := New[Hashable, any](0, | ||
func(h Hashable) uint64 { return h.Hash() }, | ||
func(x, y Hashable) bool { return x.Equal(y) }) | ||
for j := 0; j < len(keys); j++ { | ||
m.Set(keys[j].(Hashable), "foobar") | ||
} | ||
if m.Len() != len(keys) { | ||
b.Fatal(m) | ||
} | ||
} | ||
}) | ||
b.Run("Hashmap-presize", func(b *testing.B) { | ||
b.ReportAllocs() | ||
for i := 0; i < b.N; i++ { | ||
m := New[Hashable, any](150, | ||
func(h Hashable) uint64 { return h.Hash() }, | ||
func(x, y Hashable) bool { return x.Equal(y) }) | ||
for j := 0; j < len(keys); j++ { | ||
m.Set(keys[j].(Hashable), "foobar") | ||
} | ||
if m.Len() != len(keys) { | ||
b.Fatal(m) | ||
} | ||
} | ||
}) | ||
} | ||
|
||
func BenchmarkMapGet(b *testing.B) { | ||
keys := make([]key.Key, 150) | ||
for j := 0; j < len(keys); j++ { | ||
keys[j] = key.New(map[string]interface{}{ | ||
"foobar": 100, | ||
"baz": j, | ||
}) | ||
} | ||
keysRandomOrder := make([]key.Key, len(keys)) | ||
copy(keysRandomOrder, keys) | ||
rand.Shuffle(len(keysRandomOrder), func(i, j int) { | ||
keysRandomOrder[i], keysRandomOrder[j] = keysRandomOrder[j], keysRandomOrder[i] | ||
}) | ||
b.Run("key.Map", func(b *testing.B) { | ||
m := key.NewMap() | ||
for j := 0; j < len(keys); j++ { | ||
m.Set(keys[j], "foobar") | ||
} | ||
b.ReportAllocs() | ||
b.ResetTimer() | ||
for i := 0; i < b.N; i++ { | ||
for _, k := range keysRandomOrder { | ||
_, ok := m.Get(k) | ||
if !ok { | ||
b.Fatal("didn't find key") | ||
} | ||
} | ||
} | ||
}) | ||
b.Run("Hashmap", func(b *testing.B) { | ||
m := New[Hashable, any](0, | ||
func(h Hashable) uint64 { return h.Hash() }, | ||
func(x, y Hashable) bool { return x.Equal(y) }) | ||
for j := 0; j < len(keys); j++ { | ||
m.Set(keys[j].(Hashable), "foobar") | ||
} | ||
b.ReportAllocs() | ||
b.ResetTimer() | ||
for i := 0; i < b.N; i++ { | ||
for _, k := range keysRandomOrder { | ||
_, ok := m.Get(k.(Hashable)) | ||
if !ok { | ||
b.Fatal("didn't find key") | ||
} | ||
} | ||
} | ||
}) | ||
} | ||
|
||
func (m *Hashmap[K, V]) debug() string { | ||
var buf strings.Builder | ||
|
||
for i, ent := range m.entries { | ||
var ( | ||
k string | ||
distance int | ||
) | ||
if !ent.occupied { | ||
k = "<empty>" | ||
} else { | ||
if ent.tombstone { | ||
k = "<tombstone>" | ||
} else { | ||
k = fmt.Sprint(ent.key) | ||
} | ||
distance = i - m.position(ent.hash) | ||
if distance < 0 { | ||
distance += len(m.entries) | ||
} | ||
} | ||
fmt.Fprintf(&buf, "%d %d %s\n", i, distance, k) | ||
} | ||
|
||
return buf.String() | ||
} |
Oops, something went wrong.