Skip to content

Commit

Permalink
hashmap: Implement a new map implementation
Browse files Browse the repository at this point in the history
Initial hashmap implementation that supports Get, Set, Delete.

Implementation strongly influenced by:
https://www.sebastiansylvan.com/post/robin-hood-hashing-should-be-your-default-hash-table-implementation/

Simple benchmarks show that it can be faster than go's map for
map[string]interface{} keys. TODO: benchmarks that include
deletes/tombstones.

It is missing a couple nice properties of Go maps:

1) Go's map can be modified while being iterated. If additions are
   made to Hashmap while it is being iterated (Iter implementation
   still TODO), the iterator may encounter keys more than once or not
   at all. However, Hashmap iteration should behave fine if keys are
   deleted in the during iteration.

2) Go's map has a built in race detector that runs all the time.

Change-Id: I93ba5fb9b6c5b6a71e2e75f5dbc18239464756a7
  • Loading branch information
aaronbee committed Mar 6, 2023
1 parent 001a81e commit 4b6b742
Show file tree
Hide file tree
Showing 3 changed files with 954 additions and 0 deletions.
169 changes: 169 additions & 0 deletions hashmap/hashmap.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,169 @@
// Copyright (c) 2020 Arista Networks, Inc.
// Use of this source code is governed by the Apache License 2.0
// that can be found in the COPYING file.

package hashmap

import "math/bits"

// Hashable represents the key for an entry in a Map that cannot natively be hashed
type Hashable interface {
Hash() uint64
Equal(other interface{}) bool
}

// Hashmap implements a hashmap
type Hashmap[K any, V any] struct {
seed uint64
entries []entry[K, V]
length int
hash func(K) uint64
equal func(K, K) bool
}

func New[K any, V any](size uint, hash func(K) uint64, equal func(K, K) bool) *Hashmap[K, V] {
var entries []entry[K, V]
if size != 0 {
entries = make([]entry[K, V], 1<<bits.Len(size-1))
}
return &Hashmap[K, V]{entries: entries, hash: hash, equal: equal}
}

type entry[K any, V any] struct {
hash uint64
key K
value V
occupied bool
tombstone bool
}

// Len returns the length of m.
func (m *Hashmap[K, V]) Len() int {
return m.length
}

func (m *Hashmap[K, V]) mask() int {
return len(m.entries) - 1
}

func (m *Hashmap[K, V]) position(hash uint64) int {
return int((hash ^ m.seed)) & m.mask()
}

// Set associates k with v in m.
func (m *Hashmap[K, V]) Set(k K, v V) {
capacity := len(m.entries)
if capacity == 0 {
m.resize(4)
} else if m.length >= int(float64(capacity)*0.9) {
m.resize(capacity * 2)
}
m.set(m.hash(k), k, v)
}

func (m *Hashmap[K, V]) set(hash uint64, k K, v V) {
position := m.position(hash)
var distance int
for {
existing := &m.entries[position]
if !existing.occupied {
m.entries[position] = entry[K, V]{hash: hash, key: k, value: v, occupied: true}
m.length++
return
} else if existing.hash == hash && m.equal(existing.key, k) {
existing.value = v
return
}

existingDistance := position - m.position(existing.hash)
if existingDistance < 0 {
existingDistance += len(m.entries)
}
if distance > existingDistance {
// k is further from its desired position than existing.k,
// steal it's spot and find a new place for existing.
if existing.tombstone {
m.entries[position] = entry[K, V]{hash: hash, key: k, value: v, occupied: true}
m.length++
return
}
hash, existing.hash = existing.hash, hash
k, existing.key = existing.key, k
v, existing.value = existing.value, v
distance = existingDistance
} else if distance == existingDistance && existing.tombstone {
m.entries[position] = entry[K, V]{hash: hash, key: k, value: v, occupied: true}
m.length++
return
}

distance++
position = (position + 1) & m.mask()
}
}

// Get gets the value associated with k
func (m *Hashmap[K, V]) Get(k K) (V, bool) {
ent := m.getRef(k)
if ent == nil {
var v V
return v, false
}
return ent.value, true
}

func (m *Hashmap[K, V]) getRef(k K) *entry[K, V] {
hash := m.hash(k)
position := m.position(hash)
var distance int
for {
ent := &m.entries[position]
if !ent.occupied {
return nil
}
entDistance := position - m.position(ent.hash)
if entDistance < 0 {
entDistance += len(m.entries)
}
if distance > entDistance {
// Our distance has exceeded this entry's distance, we
// would have found our key by now if it was present.
return nil
}
if ent.hash == hash && m.equal(ent.key, k) {
return ent
}
distance++
position = (position + 1) & m.mask()
}
}

// Delete removes k from m
func (m *Hashmap[K, V]) Delete(k K) {
ent := m.getRef(k)
if ent == nil {
return
}
// Set the entry to a tombstone. We keep the entry's hash set, so
// that this entry's distance can still be calculated.
var (
nilK K
nilV V
)
ent.key = nilK
ent.value = nilV
ent.tombstone = true
m.length--
}

func (m *Hashmap[K, V]) resize(size int) {
oldEntries := m.entries
m.entries = make([]entry[K, V], size)
m.length = 0
for _, ent := range oldEntries {
if !ent.occupied || ent.tombstone {
continue
}
m.set(ent.hash, ent.key, ent.value)
}
}
212 changes: 212 additions & 0 deletions hashmap/hashmap_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,212 @@
// Copyright (c) 2019 Arista Networks, Inc.
// Use of this source code is governed by the Apache License 2.0
// that can be found in the COPYING file.

package hashmap

import (
"fmt"
"math/rand"
"strings"
"testing"

"github.com/aristanetworks/goarista/key"
)

type dumbHashable struct {
dumb interface{}
}

func (d dumbHashable) Equal(other interface{}) bool {
if o, ok := other.(dumbHashable); ok {
return d.dumb == o.dumb
}
return false
}

func (d dumbHashable) Hash() uint64 {
return 1234567890
}

func TestMapSetGet(t *testing.T) {
m := New[Hashable, any](0,
func(h Hashable) uint64 { return h.Hash() },
func(x, y Hashable) bool { return x.Equal(y) })
tests := []struct {
setkey interface{}
getkey interface{}
val interface{}
found bool
}{{
setkey: dumbHashable{dumb: "hashable1"},
getkey: dumbHashable{dumb: "hashable1"},
val: 1,
found: true,
}, {
getkey: dumbHashable{dumb: "hashable2"},
val: nil,
found: false,
}, {
setkey: dumbHashable{dumb: "hashable2"},
getkey: dumbHashable{dumb: "hashable2"},
val: 2,
found: true,
}, {
getkey: dumbHashable{dumb: "hashable42"},
val: nil,
found: false,
}, {
setkey: key.New(map[string]interface{}{"a": int32(1)}),
getkey: key.New(map[string]interface{}{"a": int32(1)}),
val: "foo",
found: true,
}, {
getkey: key.New(map[string]interface{}{"a": int32(2)}),
val: nil,
found: false,
}, {
setkey: key.New(map[string]interface{}{"a": int32(2)}),
getkey: key.New(map[string]interface{}{"a": int32(2)}),
val: "bar",
found: true,
}}
for _, tcase := range tests {
if tcase.setkey != nil {
m.Set(tcase.setkey.(Hashable), tcase.val)
}
val, found := m.Get(tcase.getkey.(Hashable))
if found != tcase.found {
t.Errorf("found is %t, but expected found %t", found, tcase.found)
}
if val != tcase.val {
t.Errorf("val is %v for key %v, but expected val %v", val, tcase.getkey, tcase.val)
}
}
t.Log(m.debug())
}

func BenchmarkMapGrow(b *testing.B) {
keys := make([]key.Key, 150)
for j := 0; j < len(keys); j++ {
keys[j] = key.New(map[string]interface{}{
"foobar": 100,
"baz": j,
})
}
b.Run("key.Map", func(b *testing.B) {
b.ReportAllocs()
for i := 0; i < b.N; i++ {
m := key.NewMap()
for j := 0; j < len(keys); j++ {
m.Set(keys[j], "foobar")
}
if m.Len() != len(keys) {
b.Fatal(m)
}
}
})
b.Run("Hashmap", func(b *testing.B) {
b.ReportAllocs()
for i := 0; i < b.N; i++ {
m := New[Hashable, any](0,
func(h Hashable) uint64 { return h.Hash() },
func(x, y Hashable) bool { return x.Equal(y) })
for j := 0; j < len(keys); j++ {
m.Set(keys[j].(Hashable), "foobar")
}
if m.Len() != len(keys) {
b.Fatal(m)
}
}
})
b.Run("Hashmap-presize", func(b *testing.B) {
b.ReportAllocs()
for i := 0; i < b.N; i++ {
m := New[Hashable, any](150,
func(h Hashable) uint64 { return h.Hash() },
func(x, y Hashable) bool { return x.Equal(y) })
for j := 0; j < len(keys); j++ {
m.Set(keys[j].(Hashable), "foobar")
}
if m.Len() != len(keys) {
b.Fatal(m)
}
}
})
}

func BenchmarkMapGet(b *testing.B) {
keys := make([]key.Key, 150)
for j := 0; j < len(keys); j++ {
keys[j] = key.New(map[string]interface{}{
"foobar": 100,
"baz": j,
})
}
keysRandomOrder := make([]key.Key, len(keys))
copy(keysRandomOrder, keys)
rand.Shuffle(len(keysRandomOrder), func(i, j int) {
keysRandomOrder[i], keysRandomOrder[j] = keysRandomOrder[j], keysRandomOrder[i]
})
b.Run("key.Map", func(b *testing.B) {
m := key.NewMap()
for j := 0; j < len(keys); j++ {
m.Set(keys[j], "foobar")
}
b.ReportAllocs()
b.ResetTimer()
for i := 0; i < b.N; i++ {
for _, k := range keysRandomOrder {
_, ok := m.Get(k)
if !ok {
b.Fatal("didn't find key")
}
}
}
})
b.Run("Hashmap", func(b *testing.B) {
m := New[Hashable, any](0,
func(h Hashable) uint64 { return h.Hash() },
func(x, y Hashable) bool { return x.Equal(y) })
for j := 0; j < len(keys); j++ {
m.Set(keys[j].(Hashable), "foobar")
}
b.ReportAllocs()
b.ResetTimer()
for i := 0; i < b.N; i++ {
for _, k := range keysRandomOrder {
_, ok := m.Get(k.(Hashable))
if !ok {
b.Fatal("didn't find key")
}
}
}
})
}

func (m *Hashmap[K, V]) debug() string {
var buf strings.Builder

for i, ent := range m.entries {
var (
k string
distance int
)
if !ent.occupied {
k = "<empty>"
} else {
if ent.tombstone {
k = "<tombstone>"
} else {
k = fmt.Sprint(ent.key)
}
distance = i - m.position(ent.hash)
if distance < 0 {
distance += len(m.entries)
}
}
fmt.Fprintf(&buf, "%d %d %s\n", i, distance, k)
}

return buf.String()
}
Loading

0 comments on commit 4b6b742

Please sign in to comment.