From 54c7f78d09320a179458a06ec7c9e0de3ee18e6c Mon Sep 17 00:00:00 2001 From: Martin Holst Swende Date: Sat, 11 Dec 2021 16:14:00 +0100 Subject: [PATCH 1/5] memdb: use object-oriented accessors for skiplist nodes --- leveldb/memdb/memdb.go | 172 ++++++++++++++++++++++++++++++----------- 1 file changed, 127 insertions(+), 45 deletions(-) diff --git a/leveldb/memdb/memdb.go b/leveldb/memdb/memdb.go index 824e47f5..6a9052e0 100644 --- a/leveldb/memdb/memdb.go +++ b/leveldb/memdb/memdb.go @@ -37,9 +37,8 @@ type dbIter struct { func (i *dbIter) fill(checkStart, checkLimit bool) bool { if i.node != 0 { - n := i.p.nodeData[i.node] - m := n + i.p.nodeData[i.node+nKey] - i.key = i.p.kvData[n:m] + node := i.p.nodeAt(i.node) + i.key = i.p.kvData[node.kStart():node.kEnd()] if i.slice != nil { switch { case checkLimit && i.slice.Limit != nil && i.p.cmp.Compare(i.key, i.slice.Limit) >= 0: @@ -49,7 +48,7 @@ func (i *dbIter) fill(checkStart, checkLimit bool) bool { goto bail } } - i.value = i.p.kvData[m : m+i.p.nodeData[i.node+nVal]] + i.value = i.p.kvData[node.vStart():node.vEnd()] return true } bail: @@ -74,7 +73,7 @@ func (i *dbIter) First() bool { if i.slice != nil && i.slice.Start != nil { i.node, _ = i.p.findGE(i.slice.Start, false) } else { - i.node = i.p.nodeData[nNext] + i.node = i.p.nodeAt(0).nextAt(0) } return i.fill(false, true) } @@ -127,7 +126,7 @@ func (i *dbIter) Next() bool { i.forward = true i.p.mu.RLock() defer i.p.mu.RUnlock() - i.node = i.p.nodeData[i.node+nNext] + i.node = i.p.nodeAt(i.node).nextAt(0) return i.fill(false, true) } @@ -178,19 +177,83 @@ const ( nNext ) +// node represents a node in the skiplist. It maps directly onto the nodeData +// backing array, and is not meant to be used as a separate entity (aside for when +// creating a new one). +// Thus, it's perfectly fine if the underlying array is overly large (since the exact size +// is not known before reading the height). +// Node data is laid out as follows: +// [0] : KV offset +// [1] : Key length +// [2] : Value length +// [3] : Height +// [3..height] : Next nodes +type node []int + +// kStart returns the start index for the key. +func (n node) kStart() int { + return n[0] +} + +// kEnd returns the start + length for the key. +func (n node) kEnd() int { + return n[0] + n[1] +} + +// kLen return the key length. +func (n node) kLen() int { + return n[1] +} + +// vStart return the offset for the value. +func (n node) vStart() int { + return n[0] + n[1] +} + +// vEnd return the offset + length for value. +func (n node) vEnd() int { + return n[0] + n[1] + n[2] +} + +// vLen returns the value length. +func (n node) vLen() int { + return n[2] +} + +// setKStart sets the key offset. +func (n node) setKStart(keyOffset int) node { + n[0] = keyOffset + return n +} + +// setVLen sets the value length. +func (n node) setVLen(size int) node { + n[2] = size + return n +} + +// height return the size of the next-tower. +func (n node) height() int { + return n[3] +} + +// nextAt return the item at the given height. +func (n node) nextAt(height int) int { + return n[4+height] +} + +// setNextAt sets the next item at the given height +func (n node) setNextAt(height int, node int) { + n[4+height] = node +} + // DB is an in-memory key/value database. type DB struct { cmp comparer.BasicComparer rnd *rand.Rand - mu sync.RWMutex - kvData []byte - // Node data: - // [0] : KV offset - // [1] : Key length - // [2] : Value length - // [3] : Height - // [3..height] : Next nodes + mu sync.RWMutex + kvData []byte nodeData []int prevNode [tMaxHeight]int maxHeight int @@ -207,16 +270,34 @@ func (p *DB) randHeight() (h int) { return } +// nodeAt returns the node at the given index. +func (p *DB) nodeAt(idx int) node { + return node(p.nodeData[idx:]) +} + +// newNode constructs a new node. Be careful -- this allocates a new slice, +// along with space to store the next, according to the height given. +// This node later needs to be written to the backing slice, making the original +// instance moot. +func newNode(kvOffset, kLen, vLen, height int) node { + buf := make([]int, 4+height) + buf[0] = kvOffset + buf[1] = kLen + buf[2] = vLen + buf[3] = height + return node(buf) +} + // Must hold RW-lock if prev == true, as it use shared prevNode slice. func (p *DB) findGE(key []byte, prev bool) (int, bool) { node := 0 h := p.maxHeight - 1 for { - next := p.nodeData[node+nNext+h] + next := p.nodeAt(node).nextAt(h) cmp := 1 if next != 0 { - o := p.nodeData[next] - cmp = p.cmp.Compare(p.kvData[o:o+p.nodeData[next+nKey]], key) + o := p.nodeAt(next) + cmp = p.cmp.Compare(p.kvData[o.kStart():o.kEnd()], key) } if cmp < 0 { // Keep searching in this list @@ -239,9 +320,9 @@ func (p *DB) findLT(key []byte) int { node := 0 h := p.maxHeight - 1 for { - next := p.nodeData[node+nNext+h] - o := p.nodeData[next] - if next == 0 || p.cmp.Compare(p.kvData[o:o+p.nodeData[next+nKey]], key) >= 0 { + next := p.nodeAt(node).nextAt(h) + o := p.nodeAt(next) + if next == 0 || p.cmp.Compare(p.kvData[o.kStart():o.kEnd()], key) >= 0 { if h == 0 { break } @@ -257,7 +338,7 @@ func (p *DB) findLast() int { node := 0 h := p.maxHeight - 1 for { - next := p.nodeData[node+nNext+h] + next := p.nodeAt(node).nextAt(h) if next == 0 { if h == 0 { break @@ -282,9 +363,10 @@ func (p *DB) Put(key []byte, value []byte) error { kvOffset := len(p.kvData) p.kvData = append(p.kvData, key...) p.kvData = append(p.kvData, value...) - p.nodeData[node] = kvOffset - m := p.nodeData[node+nVal] - p.nodeData[node+nVal] = len(value) + // since match is exact, there's no need to set the key size again + existing := p.nodeAt(node) + m := existing.vLen() + p.nodeAt(node).setKStart(kvOffset).setVLen(len(value)) p.kvSize += len(value) - m return nil } @@ -302,12 +384,13 @@ func (p *DB) Put(key []byte, value []byte) error { p.kvData = append(p.kvData, value...) // Node node := len(p.nodeData) - p.nodeData = append(p.nodeData, kvOffset, len(key), len(value), h) + newN := newNode(kvOffset, len(key), len(value), h) for i, n := range p.prevNode[:h] { - m := n + nNext + i - p.nodeData = append(p.nodeData, p.nodeData[m]) - p.nodeData[m] = node + prev := p.nodeAt(n) + newN.setNextAt(i, prev.nextAt(i)) + prev.setNextAt(i, node) } + p.nodeData = append(p.nodeData, newN...) p.kvSize += len(key) + len(value) p.n++ @@ -327,13 +410,13 @@ func (p *DB) Delete(key []byte) error { return ErrNotFound } - h := p.nodeData[node+nHeight] + todelete := p.nodeAt(node) + h := todelete.height() for i, n := range p.prevNode[:h] { - m := n + nNext + i - p.nodeData[m] = p.nodeData[p.nodeData[m]+nNext+i] + prev := p.nodeAt(n) + prev.setNextAt(i, todelete.nextAt(i)) } - - p.kvSize -= p.nodeData[node+nKey] + p.nodeData[node+nVal] + p.kvSize -= todelete.kLen() + todelete.vLen() p.n-- return nil } @@ -356,8 +439,8 @@ func (p *DB) Contains(key []byte) bool { func (p *DB) Get(key []byte) (value []byte, err error) { p.mu.RLock() if node, exact := p.findGE(key, false); exact { - o := p.nodeData[node] + p.nodeData[node+nKey] - value = p.kvData[o : o+p.nodeData[node+nVal]] + n := p.nodeAt(node) + value = p.kvData[n.vStart():n.vEnd()] } else { err = ErrNotFound } @@ -374,10 +457,9 @@ func (p *DB) Get(key []byte) (value []byte, err error) { func (p *DB) Find(key []byte) (rkey, value []byte, err error) { p.mu.RLock() if node, _ := p.findGE(key, false); node != 0 { - n := p.nodeData[node] - m := n + p.nodeData[node+nKey] - rkey = p.kvData[n:m] - value = p.kvData[m : m+p.nodeData[node+nVal]] + n := p.nodeAt(node) + rkey = p.kvData[n.kStart():n.kEnd()] + value = p.kvData[n.vStart():n.vEnd()] } else { err = ErrNotFound } @@ -446,15 +528,15 @@ func (p *DB) Reset() { p.n = 0 p.kvSize = 0 p.kvData = p.kvData[:0] - p.nodeData = p.nodeData[:nNext+tMaxHeight] - p.nodeData[nKV] = 0 - p.nodeData[nKey] = 0 - p.nodeData[nVal] = 0 - p.nodeData[nHeight] = tMaxHeight + + p.nodeData = p.nodeData[:0] + // Add empty first element + zero := newNode(0, 0, 0, tMaxHeight) for n := 0; n < tMaxHeight; n++ { - p.nodeData[nNext+n] = 0 + zero.setNextAt(n, 0) p.prevNode[n] = 0 } + p.nodeData = append(p.nodeData, zero...) p.mu.Unlock() } From 667fbf85ec65dfa4907b450c8b39104b97a9623f Mon Sep 17 00:00:00 2001 From: Martin Holst Swende Date: Sat, 11 Dec 2021 21:37:18 +0100 Subject: [PATCH 2/5] memdb: define backing-type --- leveldb/memdb/memdb.go | 63 ++++++++++++++++++++++++++++-------------- 1 file changed, 43 insertions(+), 20 deletions(-) diff --git a/leveldb/memdb/memdb.go b/leveldb/memdb/memdb.go index 6a9052e0..60a2c0d4 100644 --- a/leveldb/memdb/memdb.go +++ b/leveldb/memdb/memdb.go @@ -8,8 +8,10 @@ package memdb import ( + "fmt" "math/rand" "sync" + "unsafe" "github.com/syndtr/goleveldb/leveldb/comparer" "github.com/syndtr/goleveldb/leveldb/errors" @@ -177,6 +179,9 @@ const ( nNext ) +// The backing representation for the nodeData slice +type nodeInt int + // node represents a node in the skiplist. It maps directly onto the nodeData // backing array, and is not meant to be used as a separate entity (aside for when // creating a new one). @@ -188,63 +193,63 @@ const ( // [2] : Value length // [3] : Height // [3..height] : Next nodes -type node []int +type node []nodeInt // kStart returns the start index for the key. func (n node) kStart() int { - return n[0] + return int(n[0]) } // kEnd returns the start + length for the key. func (n node) kEnd() int { - return n[0] + n[1] + return int(n[0] + n[1]) } // kLen return the key length. func (n node) kLen() int { - return n[1] + return int(n[1]) } // vStart return the offset for the value. func (n node) vStart() int { - return n[0] + n[1] + return int(n[0] + n[1]) } // vEnd return the offset + length for value. func (n node) vEnd() int { - return n[0] + n[1] + n[2] + return int(n[0] + n[1] + n[2]) } // vLen returns the value length. func (n node) vLen() int { - return n[2] + return int(n[2]) } // setKStart sets the key offset. func (n node) setKStart(keyOffset int) node { - n[0] = keyOffset + n[0] = nodeInt(keyOffset) return n } // setVLen sets the value length. func (n node) setVLen(size int) node { - n[2] = size + n[2] = nodeInt(size) return n } // height return the size of the next-tower. func (n node) height() int { - return n[3] + return int(n[3]) } // nextAt return the item at the given height. func (n node) nextAt(height int) int { - return n[4+height] + return int(n[4+height]) } // setNextAt sets the next item at the given height func (n node) setNextAt(height int, node int) { - n[4+height] = node + n[4+height] = nodeInt(node) } // DB is an in-memory key/value database. @@ -254,7 +259,7 @@ type DB struct { mu sync.RWMutex kvData []byte - nodeData []int + nodeData []nodeInt prevNode [tMaxHeight]int maxHeight int n int @@ -280,11 +285,11 @@ func (p *DB) nodeAt(idx int) node { // This node later needs to be written to the backing slice, making the original // instance moot. func newNode(kvOffset, kLen, vLen, height int) node { - buf := make([]int, 4+height) - buf[0] = kvOffset - buf[1] = kLen - buf[2] = vLen - buf[3] = height + buf := make([]nodeInt, 4+height) + buf[0] = nodeInt(kvOffset) + buf[1] = nodeInt(kLen) + buf[2] = nodeInt(vLen) + buf[3] = nodeInt(height) return node(buf) } @@ -554,8 +559,26 @@ func New(cmp comparer.BasicComparer, capacity int) *DB { rnd: rand.New(rand.NewSource(0xdeadbeef)), maxHeight: 1, kvData: make([]byte, 0, capacity), - nodeData: make([]int, 4+tMaxHeight), } - p.nodeData[nHeight] = tMaxHeight + // Add empty first element + zero := newNode(0, 0, 0, tMaxHeight) + for n := 0; n < tMaxHeight; n++ { + zero.setNextAt(n, 0) + } + p.nodeData = append(p.nodeData, zero...) return p } + +func (p *DB) Stats() string { + p.mu.RLock() + defer p.mu.RUnlock() + dataSize := len(p.kvData) + metadataSize := len(p.nodeData) * int(unsafe.Sizeof(nodeInt(0))) + return fmt.Sprintf(`keyvalue size: %d +metadata size: %d +item count: %d +data/metadata ratio: %.02f +average kv item size: %.02f +`, dataSize, metadataSize, p.n, + float64(dataSize)/float64(metadataSize+1), float64(dataSize/p.n)) +} From 081ba6cf9f694f3497dd8cd325a914a70ebe80a2 Mon Sep 17 00:00:00 2001 From: Martin Holst Swende Date: Sat, 11 Dec 2021 23:02:30 +0100 Subject: [PATCH 3/5] memdb: make tests use accessors --- leveldb/memdb/memdb_test.go | 14 ++++++-------- 1 file changed, 6 insertions(+), 8 deletions(-) diff --git a/leveldb/memdb/memdb_test.go b/leveldb/memdb/memdb_test.go index 3f0a31e4..a32ecfa0 100644 --- a/leveldb/memdb/memdb_test.go +++ b/leveldb/memdb/memdb_test.go @@ -19,10 +19,9 @@ import ( func (p *DB) TestFindLT(key []byte) (rkey, value []byte, err error) { p.mu.RLock() if node := p.findLT(key); node != 0 { - n := p.nodeData[node] - m := n + p.nodeData[node+nKey] - rkey = p.kvData[n:m] - value = p.kvData[m : m+p.nodeData[node+nVal]] + n := p.nodeAt(node) + rkey = p.kvData[n.kStart():n.kEnd()] + value = p.kvData[n.vStart():n.vEnd()] } else { err = ErrNotFound } @@ -33,10 +32,9 @@ func (p *DB) TestFindLT(key []byte) (rkey, value []byte, err error) { func (p *DB) TestFindLast() (rkey, value []byte, err error) { p.mu.RLock() if node := p.findLast(); node != 0 { - n := p.nodeData[node] - m := n + p.nodeData[node+nKey] - rkey = p.kvData[n:m] - value = p.kvData[m : m+p.nodeData[node+nVal]] + n := p.nodeAt(node) + rkey = p.kvData[n.kStart():n.kEnd()] + value = p.kvData[n.vStart():n.vEnd()] } else { err = ErrNotFound } From de5eb3fc8a51cfd0eca307f79ce9e25b45ca8ead Mon Sep 17 00:00:00 2001 From: Martin Holst Swende Date: Sat, 11 Dec 2021 23:21:47 +0100 Subject: [PATCH 4/5] memdb: remove unused constants --- leveldb/memdb/memdb.go | 8 -------- 1 file changed, 8 deletions(-) diff --git a/leveldb/memdb/memdb.go b/leveldb/memdb/memdb.go index 60a2c0d4..5d830d3e 100644 --- a/leveldb/memdb/memdb.go +++ b/leveldb/memdb/memdb.go @@ -171,14 +171,6 @@ func (i *dbIter) Release() { } } -const ( - nKV = iota - nKey - nVal - nHeight - nNext -) - // The backing representation for the nodeData slice type nodeInt int From afed0b20c927ac21c9a08746fe9ac5dedfd5ff0e Mon Sep 17 00:00:00 2001 From: Martin Holst Swende Date: Mon, 13 Dec 2021 10:05:17 +0100 Subject: [PATCH 5/5] memdb: minor refactor --- leveldb/memdb/memdb.go | 98 +++-------------------------------------- leveldb/memdb/node.go | 99 ++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 104 insertions(+), 93 deletions(-) create mode 100644 leveldb/memdb/node.go diff --git a/leveldb/memdb/memdb.go b/leveldb/memdb/memdb.go index 5d830d3e..2c7bf75f 100644 --- a/leveldb/memdb/memdb.go +++ b/leveldb/memdb/memdb.go @@ -25,7 +25,10 @@ var ( ErrIterReleased = errors.New("leveldb/memdb: iterator released") ) -const tMaxHeight = 12 +const ( + tMaxHeight = 12 // max height of a skiplist 'tower' + branching = 4 // branching factor for the skiplist +) type dbIter struct { util.BasicReleaser @@ -171,79 +174,6 @@ func (i *dbIter) Release() { } } -// The backing representation for the nodeData slice -type nodeInt int - -// node represents a node in the skiplist. It maps directly onto the nodeData -// backing array, and is not meant to be used as a separate entity (aside for when -// creating a new one). -// Thus, it's perfectly fine if the underlying array is overly large (since the exact size -// is not known before reading the height). -// Node data is laid out as follows: -// [0] : KV offset -// [1] : Key length -// [2] : Value length -// [3] : Height -// [3..height] : Next nodes -type node []nodeInt - -// kStart returns the start index for the key. -func (n node) kStart() int { - return int(n[0]) -} - -// kEnd returns the start + length for the key. -func (n node) kEnd() int { - return int(n[0] + n[1]) -} - -// kLen return the key length. -func (n node) kLen() int { - return int(n[1]) -} - -// vStart return the offset for the value. -func (n node) vStart() int { - return int(n[0] + n[1]) -} - -// vEnd return the offset + length for value. -func (n node) vEnd() int { - return int(n[0] + n[1] + n[2]) -} - -// vLen returns the value length. -func (n node) vLen() int { - return int(n[2]) -} - -// setKStart sets the key offset. -func (n node) setKStart(keyOffset int) node { - n[0] = nodeInt(keyOffset) - return n -} - -// setVLen sets the value length. -func (n node) setVLen(size int) node { - n[2] = nodeInt(size) - return n -} - -// height return the size of the next-tower. -func (n node) height() int { - return int(n[3]) -} - -// nextAt return the item at the given height. -func (n node) nextAt(height int) int { - return int(n[4+height]) -} - -// setNextAt sets the next item at the given height -func (n node) setNextAt(height int, node int) { - n[4+height] = nodeInt(node) -} - // DB is an in-memory key/value database. type DB struct { cmp comparer.BasicComparer @@ -259,7 +189,6 @@ type DB struct { } func (p *DB) randHeight() (h int) { - const branching = 4 h = 1 for h < tMaxHeight && p.rnd.Int()%branching == 0 { h++ @@ -267,24 +196,6 @@ func (p *DB) randHeight() (h int) { return } -// nodeAt returns the node at the given index. -func (p *DB) nodeAt(idx int) node { - return node(p.nodeData[idx:]) -} - -// newNode constructs a new node. Be careful -- this allocates a new slice, -// along with space to store the next, according to the height given. -// This node later needs to be written to the backing slice, making the original -// instance moot. -func newNode(kvOffset, kLen, vLen, height int) node { - buf := make([]nodeInt, 4+height) - buf[0] = nodeInt(kvOffset) - buf[1] = nodeInt(kLen) - buf[2] = nodeInt(vLen) - buf[3] = nodeInt(height) - return node(buf) -} - // Must hold RW-lock if prev == true, as it use shared prevNode slice. func (p *DB) findGE(key []byte, prev bool) (int, bool) { node := 0 @@ -561,6 +472,7 @@ func New(cmp comparer.BasicComparer, capacity int) *DB { return p } +// Stats returns some memdb runtime information. func (p *DB) Stats() string { p.mu.RLock() defer p.mu.RUnlock() diff --git a/leveldb/memdb/node.go b/leveldb/memdb/node.go new file mode 100644 index 00000000..79be848c --- /dev/null +++ b/leveldb/memdb/node.go @@ -0,0 +1,99 @@ +// Copyright (c) 2021, Suryandaru Triandana +// All rights reserved. +// +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +// Package memdb provides in-memory key/value database implementation. +package memdb + +// The backing representation for the nodeData slice +type nodeInt int + +// node represents a node in the skiplist. It maps directly onto the nodeData +// backing array, and is not meant to be used as a separate entity (aside for when +// creating a new one). +// Thus, it's perfectly fine if the underlying array is overly large (since the exact size +// is not known before reading the height). +// Node data is laid out as follows: +// [0] : KV offset +// [1] : Key length +// [2] : Value length +// [3] : Height +// [3..height] : Next nodes +type node []nodeInt + +// newNode constructs a new node. Be careful -- this allocates a new slice, +// along with space to store the next, according to the height given. +// This node later needs to be written to the backing slice, making the original +// instance moot. +func newNode(kvOffset, kLen, vLen, height int) node { + buf := make([]nodeInt, 4+height) + buf[0] = nodeInt(kvOffset) + buf[1] = nodeInt(kLen) + buf[2] = nodeInt(vLen) + buf[3] = nodeInt(height) + return node(buf) +} + +// kStart returns the start index for the key. +func (n node) kStart() int { + return int(n[0]) +} + +// kEnd returns the start + length for the key. +func (n node) kEnd() int { + return int(n[0] + n[1]) +} + +// kLen return the key length. +func (n node) kLen() int { + return int(n[1]) +} + +// vStart return the offset for the value. +func (n node) vStart() int { + return int(n[0] + n[1]) +} + +// vEnd return the offset + length for value. +func (n node) vEnd() int { + return int(n[0] + n[1] + n[2]) +} + +// vLen returns the value length. +func (n node) vLen() int { + return int(n[2]) +} + +// setKStart sets the key offset. +func (n node) setKStart(keyOffset int) node { + n[0] = nodeInt(keyOffset) + return n +} + +// setVLen sets the value length. +func (n node) setVLen(size int) node { + n[2] = nodeInt(size) + return n +} + +// height return the size of the next-tower. +func (n node) height() int { + return int(n[3]) +} + +// nextAt return the item at the given height. +func (n node) nextAt(height int) int { + return int(n[4+height]) +} + +// setNextAt sets the next item at the given height +func (n node) setNextAt(height int, node int) { + n[4+height] = nodeInt(node) +} + +// nodeAt returns the node at the given index. +func (p *DB) nodeAt(idx int) node { + return node(p.nodeData[idx:]) +}