Skip to content

Commit 639f052

Browse files
committed
sync2: implement database-backed sync based on FPTree
`fptree.FPTree` provides a sufficiently efficient data structure for performing range fingerprinting on data residing in database tables, speeding up the queries at expense of some memory use. `dbset.DBSet` builds on `fptree.FPTree` and provides a database-backed implementation of the `multipeer.OrderedSet` interface.
1 parent f886141 commit 639f052

16 files changed

+5019
-0
lines changed

sync2/dbset/dbset.go

+275
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,275 @@
1+
package dbset
2+
3+
import (
4+
"fmt"
5+
"maps"
6+
"sync"
7+
"time"
8+
9+
"github.com/spacemeshos/go-spacemesh/sql"
10+
"github.com/spacemeshos/go-spacemesh/sync2/fptree"
11+
"github.com/spacemeshos/go-spacemesh/sync2/rangesync"
12+
"github.com/spacemeshos/go-spacemesh/sync2/sqlstore"
13+
)
14+
15+
// DBSet is an implementation of rangesync.OrderedSet that uses an SQL database
16+
// as its backing store. It uses an FPTree to perform efficient range queries.
17+
type DBSet struct {
18+
loadMtx sync.Mutex
19+
db sql.Executor
20+
ft *fptree.FPTree
21+
st *sqlstore.SyncedTable
22+
snapshot *sqlstore.SyncedTableSnapshot
23+
dbStore *fptree.DBBackedStore
24+
keyLen int
25+
maxDepth int
26+
received map[string]struct{}
27+
}
28+
29+
var _ rangesync.OrderedSet = &DBSet{}
30+
31+
// NewDBSet creates a new DBSet.
32+
func NewDBSet(
33+
db sql.Executor,
34+
st *sqlstore.SyncedTable,
35+
keyLen, maxDepth int,
36+
) *DBSet {
37+
return &DBSet{
38+
db: db,
39+
st: st,
40+
keyLen: keyLen,
41+
maxDepth: maxDepth,
42+
}
43+
}
44+
45+
func (d *DBSet) handleIDfromDB(stmt *sql.Statement) bool {
46+
id := make(rangesync.KeyBytes, d.keyLen)
47+
stmt.ColumnBytes(0, id[:])
48+
d.ft.AddStoredKey(id)
49+
return true
50+
}
51+
52+
// EnsureLoaded ensures that the DBSet is loaded and ready to be used.
53+
func (d *DBSet) EnsureLoaded() error {
54+
d.loadMtx.Lock()
55+
defer d.loadMtx.Unlock()
56+
if d.ft != nil {
57+
return nil
58+
}
59+
var err error
60+
d.snapshot, err = d.st.Snapshot(d.db)
61+
if err != nil {
62+
return fmt.Errorf("error taking snapshot: %w", err)
63+
}
64+
count, err := d.snapshot.LoadCount(d.db)
65+
if err != nil {
66+
return fmt.Errorf("error loading count: %w", err)
67+
}
68+
d.dbStore = fptree.NewDBBackedStore(d.db, d.snapshot, count, d.keyLen)
69+
d.ft = fptree.NewFPTree(count, d.dbStore, d.keyLen, d.maxDepth)
70+
return d.snapshot.Load(d.db, d.handleIDfromDB)
71+
}
72+
73+
// Received returns a sequence of all items that have been received.
74+
// Implements rangesync.OrderedSet.
75+
func (d *DBSet) Received() rangesync.SeqResult {
76+
return rangesync.SeqResult{
77+
Seq: func(yield func(k rangesync.KeyBytes) bool) {
78+
for k := range d.received {
79+
if !yield(rangesync.KeyBytes(k)) {
80+
return
81+
}
82+
}
83+
},
84+
Error: rangesync.NoSeqError,
85+
}
86+
}
87+
88+
// Add adds an item to the DBSet.
89+
// Implements rangesync.OrderedSet.
90+
func (d *DBSet) Add(k rangesync.KeyBytes) error {
91+
if has, err := d.Has(k); err != nil {
92+
return fmt.Errorf("checking if item exists: %w", err)
93+
} else if has {
94+
return nil
95+
}
96+
d.ft.RegisterKey(k)
97+
return nil
98+
}
99+
100+
// Receive handles a newly received item, arranging for it to be returned as part of the
101+
// sequence returned by Received.
102+
// Implements rangesync.OrderedSet.
103+
func (d *DBSet) Receive(k rangesync.KeyBytes) error {
104+
if d.received == nil {
105+
d.received = make(map[string]struct{})
106+
}
107+
d.received[string(k)] = struct{}{}
108+
return nil
109+
}
110+
111+
func (d *DBSet) firstItem() (rangesync.KeyBytes, error) {
112+
if err := d.EnsureLoaded(); err != nil {
113+
return nil, err
114+
}
115+
return d.ft.All().First()
116+
}
117+
118+
// GetRangeInfo returns information about the range of items in the DBSet.
119+
// Implements rangesync.OrderedSet.
120+
func (d *DBSet) GetRangeInfo(x, y rangesync.KeyBytes) (rangesync.RangeInfo, error) {
121+
if err := d.EnsureLoaded(); err != nil {
122+
return rangesync.RangeInfo{}, err
123+
}
124+
if d.ft.Count() == 0 {
125+
return rangesync.RangeInfo{
126+
Items: rangesync.EmptySeqResult(),
127+
}, nil
128+
}
129+
if x == nil || y == nil {
130+
if x != nil || y != nil {
131+
panic("BUG: GetRangeInfo called with one of x/y nil but not both")
132+
}
133+
var err error
134+
x, err = d.firstItem()
135+
if err != nil {
136+
return rangesync.RangeInfo{}, fmt.Errorf("getting first item: %w", err)
137+
}
138+
y = x
139+
}
140+
fpr, err := d.ft.FingerprintInterval(x, y, -1)
141+
if err != nil {
142+
return rangesync.RangeInfo{}, err
143+
}
144+
return rangesync.RangeInfo{
145+
Fingerprint: fpr.FP,
146+
Count: int(fpr.Count),
147+
Items: fpr.Items,
148+
}, nil
149+
}
150+
151+
// SplitRange splits the range of items in the DBSet into two parts,
152+
// returning information about eachn part and the middle item.
153+
// Implements rangesync.OrderedSet.
154+
func (d *DBSet) SplitRange(x, y rangesync.KeyBytes, count int) (rangesync.SplitInfo, error) {
155+
if count <= 0 {
156+
panic("BUG: bad split count")
157+
}
158+
159+
if err := d.EnsureLoaded(); err != nil {
160+
return rangesync.SplitInfo{}, err
161+
}
162+
163+
sr, err := d.ft.Split(x, y, count)
164+
if err != nil {
165+
return rangesync.SplitInfo{}, err
166+
}
167+
168+
return rangesync.SplitInfo{
169+
Parts: [2]rangesync.RangeInfo{
170+
{
171+
Fingerprint: sr.Part0.FP,
172+
Count: int(sr.Part0.Count),
173+
Items: sr.Part0.Items,
174+
},
175+
{
176+
Fingerprint: sr.Part1.FP,
177+
Count: int(sr.Part1.Count),
178+
Items: sr.Part1.Items,
179+
},
180+
},
181+
Middle: sr.Middle,
182+
}, nil
183+
}
184+
185+
// Items returns a sequence of all items in the DBSet.
186+
// Implements rangesync.OrderedSet.
187+
func (d *DBSet) Items() rangesync.SeqResult {
188+
if err := d.EnsureLoaded(); err != nil {
189+
return rangesync.ErrorSeqResult(err)
190+
}
191+
return d.ft.All()
192+
}
193+
194+
// Empty returns true if the DBSet is empty.
195+
// Implements rangesync.OrderedSet.
196+
func (d *DBSet) Empty() (bool, error) {
197+
if err := d.EnsureLoaded(); err != nil {
198+
return false, err
199+
}
200+
return d.ft.Count() == 0, nil
201+
}
202+
203+
// Advance advances the DBSet to the latest state of the underlying database table.
204+
func (d *DBSet) Advance() error {
205+
d.loadMtx.Lock()
206+
defer d.loadMtx.Unlock()
207+
if d.ft == nil {
208+
// FIXME
209+
panic("BUG: can't advance the DBItemStore before it's loaded")
210+
}
211+
oldSnapshot := d.snapshot
212+
var err error
213+
d.snapshot, err = d.st.Snapshot(d.db)
214+
if err != nil {
215+
return fmt.Errorf("error taking snapshot: %w", err)
216+
}
217+
d.dbStore.SetSnapshot(d.snapshot)
218+
return d.snapshot.LoadSinceSnapshot(d.db, oldSnapshot, d.handleIDfromDB)
219+
}
220+
221+
// Copy creates a copy of the DBSet.
222+
// Implements rangesync.OrderedSet.
223+
func (d *DBSet) Copy(syncScope bool) rangesync.OrderedSet {
224+
d.loadMtx.Lock()
225+
defer d.loadMtx.Unlock()
226+
if d.ft == nil {
227+
// FIXME
228+
panic("BUG: can't copy the DBItemStore before it's loaded")
229+
}
230+
ft := d.ft.Clone().(*fptree.FPTree)
231+
return &DBSet{
232+
db: d.db,
233+
ft: ft,
234+
st: d.st,
235+
keyLen: d.keyLen,
236+
maxDepth: d.maxDepth,
237+
dbStore: d.dbStore,
238+
received: maps.Clone(d.received),
239+
}
240+
}
241+
242+
// Has returns true if the DBSet contains the given item.
243+
func (d *DBSet) Has(k rangesync.KeyBytes) (bool, error) {
244+
if err := d.EnsureLoaded(); err != nil {
245+
return false, err
246+
}
247+
248+
// checkKey may have false positives, but not false negatives, and it's much
249+
// faster than querying the database
250+
if !d.ft.CheckKey(k) {
251+
return false, nil
252+
}
253+
254+
first, err := d.dbStore.From(k, 1).First()
255+
if err != nil {
256+
return false, err
257+
}
258+
return first.Compare(k) == 0, nil
259+
}
260+
261+
// Recent returns a sequence of items that have been added to the DBSet since the given time.
262+
func (d *DBSet) Recent(since time.Time) (rangesync.SeqResult, int) {
263+
return d.dbStore.Since(make(rangesync.KeyBytes, d.keyLen), since.UnixNano())
264+
}
265+
266+
// Release releases resources associated with the DBSet.
267+
func (d *DBSet) Release() error {
268+
d.loadMtx.Lock()
269+
defer d.loadMtx.Unlock()
270+
if d.ft != nil {
271+
d.ft.Release()
272+
d.ft = nil
273+
}
274+
return nil
275+
}

0 commit comments

Comments
 (0)