You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
// 并行的运行子压缩情况
func (lm *levelManager) addSplits(cd *compactDef) {
cd.splits = cd.splits[:0]
// Let's say we have 10 tables in cd.bot and min width = 3. Then, we'll pick
// 0, 1, 2 (pick), 3, 4, 5 (pick), 6, 7, 8 (pick), 9 (pick, because last table).
// This gives us 4 picks for 10 tables.
// In an edge case, 142 tables in bottom led to 48 splits. That's too many splits, because it
// then uses up a lot of memory for table builder.
// We should keep it so we have at max 5 splits.
width := int(math.Ceil(float64(len(cd.bot)) / 5.0))
if width < 3 {
width = 3
}
skr := cd.thisRange
skr.extend(cd.nextRange)
addRange := func(right []byte) {
skr.right = utils.Copy(right)
cd.splits = append(cd.splits, skr)
skr.left = skr.right
}
for i, t := range cd.bot {
// last entry in bottom table.
// 最后一个range分配无穷大的空间
if i == len(cd.bot)-1 {
addRange([]byte{})
return
}
// 这里实现的是每(cd.bot/5)个合并成一个
if i%width == width-1 {
// 设置最大值为右区间
right := utils.KeyWithTs(utils.ParseKey(t.ss.MaxKey()), math.MaxUint64)
addRange(right)
}
}
}
func (lm *levelManager) subcompact(it utils.Iterator, kr keyRange, cd compactDef, inflightBuilders *utils.Throttle, res chan<- *table) {
for it.Valid() {
key := it.Item().Entry().Key // 我感觉是这里做了处理, 不在这个key区间的, 直接退出; 虽然这个key区间是根据 bot[] 划分而来;
if len(kr.right) > 0 && utils.CompareKeys(key, kr.right) >= 0 {
break
}
// It was true that it.Valid() at least once in the loop above, which means we
// called Add() at least once, and builder is not Empty().
if builder.empty() {
// Cleanup builder resources:
builder.finish()
builder.Close()
continue
}
if err := inflightBuilders.Do(); err != nil {
// Can't return from here, until I decrRef all the tables that I built so far.
break
}
// 充分发挥 ssd的并行 写入特性, 有疑问;
go func(builder *tableBuilder) {
defer inflightBuilders.Done(nil)
defer builder.Close()
var tbl *table
newFID := atomic.AddUint64(&lm.maxFID, 1) // compact的时候是没有memtable的,这里自增maxFID即可。
// TODO 这里的sst文件需要根据level大小变化
sstName := utils.FileNameSSTable(lm.opt.WorkDir, newFID)
tbl = OpenTable(lm, sstName, builder)
if tbl == nil {
return
}
res <- tbl
}(builder)
}
}
addSplits函数代码如下:
视频讲解当中有一个细节优化是在切片时,以key为粒度进行划分,目的是为了减少划分后table之间的key重叠,提高效率(我感觉除了l0层,应该也不会有key重叠),但是代码里面,for循环中还是以cd.bot中的table来进行遍历,这里感觉还是以table为粒度来进行划分,请问是这样的吗?
The text was updated successfully, but these errors were encountered: