Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Reclaim while rebalance #292

Merged
merged 4 commits into from
Jun 9, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
67 changes: 23 additions & 44 deletions dependencies/lmdb/libraries/liblmdb/mdb.c
Original file line number Diff line number Diff line change
Expand Up @@ -1656,7 +1656,6 @@ struct MDB_env {
# define me_freelist_written_end me_pgstate.mf_written_end
unsigned int me_maxfreepgs_to_load; /**< max freelist entries to load into memory */
unsigned int me_maxfreepgs_to_retain; /**< max freelist entries to load into memory */
int me_freelist_state; /**< state of writing freelist (if it is being deleted) */
MDB_page *me_dpages; /**< list of malloc'd blocks for re-use */
/** IDL of pages that became unused in a write txn */
MDB_IDL me_free_pgs;
Expand Down Expand Up @@ -2856,7 +2855,6 @@ mdb_page_alloc(MDB_cursor *mc, int num, MDB_page **mp)
load_more:
if (op == MDB_SET_RANGE) { /* 1st iteration */
/* Prepare to fetch more and coalesce */
if (env->me_freelist_state & MDB_FREELIST_DELETING) break;
oldest = env->me_pgoldest;
mdb_cursor_init(&m2, txn, FREE_DBI, NULL);
#if (MDB_DEVEL) & 2 /* "& 2" so MDB_DEVEL=1 won't hide bugs breaking freeDB */
Expand All @@ -2883,20 +2881,6 @@ mdb_page_alloc(MDB_cursor *mc, int num, MDB_page **mp)
key.mv_size = sizeof(last);
rc = mdb_cursor_get(&m2, &key, NULL, op);
op = MDB_NEXT; // now iterate forwards through the txns of free list
if (rc) {
if (rc == MDB_NOTFOUND) { // if not found, go to the beginning of the range and look for older txns
if (env->me_freelist_state & MDB_FREELIST_DELETING) {
//fprintf(stderr, "Skipping unsafe backwards iteration 1\n");
break; // but can't iterate backwards in deletion mode
}
mdb_cursor_last(&m2, &key, NULL);
env->me_freelist_end = oldest;
op = MDB_PREV;
rc = mdb_cursor_get(&m2, &key, NULL, op);
}
if (rc && rc != MDB_NOTFOUND) goto fail;
}
mdb_cassert(&m2, key.mv_size > 0);
last = *(txnid_t *) key.mv_data;
} else {
// no more transactions to read going forward through newest, we are now going
Expand All @@ -2911,11 +2895,6 @@ mdb_page_alloc(MDB_cursor *mc, int num, MDB_page **mp)
}
} else {
// we are now iterating through the free list entries
// first, we need to check if we are in free-list deletion mode, in which case it is not safe to iterate backwards and we have to bail out
if (op == MDB_PREV && (env->me_freelist_state & MDB_FREELIST_DELETING)) {
//fprintf(stderr, "Skipping unsafe backwards iteration 1\n");
break;
}
// now iterate
rc = mdb_cursor_get(&m2, &key, NULL, op);
if (rc && rc != MDB_NOTFOUND)
Expand All @@ -2928,13 +2907,12 @@ mdb_page_alloc(MDB_cursor *mc, int num, MDB_page **mp)
if (op == MDB_NEXT) {
// iterating forward from the freelist range to find newer transactions
if (last >= oldest || rc == MDB_NOTFOUND) {
if (!rc) env->me_freelist_end = oldest;
env->me_freelist_end = oldest;
// no more newer transactions, go to the beginning of the range and look for older txns
if (env->me_freelist_state & MDB_FREELIST_DELETING) break;
op = MDB_SET_RANGE;
if (env->me_freelist_start <= 1) break; // should be no zero entry, break out
last = env->me_freelist_start - 1;
key.mv_data = &last; // start at the end of the freelist and read newer transactions free pages
key.mv_data = &last; // start at the start of the freelist and read older transactions free pages
key.mv_size = sizeof(last);
mdb_cursor_init(&m2, txn, FREE_DBI, NULL);
rc = mdb_cursor_get(&m2, &key, NULL, op);
Expand All @@ -2946,8 +2924,7 @@ mdb_page_alloc(MDB_cursor *mc, int num, MDB_page **mp)
if (rc == MDB_NOTFOUND) break;
else goto fail;
}
}
goto fail;
} else goto fail;
}
mdb_cassert(&m2, key.mv_size > 0);
last = *(txnid_t*)key.mv_data;
Expand All @@ -2956,21 +2933,6 @@ mdb_page_alloc(MDB_cursor *mc, int num, MDB_page **mp)
rc = MDB_NOTFOUND;
goto fail;
}
if (last >= env->me_freelist_start) {
// go to previous entry, through next iteration
rc = mdb_cursor_get(&m2, &key, NULL, op);
if (rc) {
if (rc == MDB_NOTFOUND) {
// reached the very start, mark it as 1
env->me_freelist_start = 1;
break;
}
goto fail;
} else
mdb_cassert(&m2, key.mv_size > 0);
last = *(txnid_t*)key.mv_data;
}
env->me_freelist_start = last;
} else
env->me_freelist_end = last + 1;
} else {
Expand All @@ -2979,6 +2941,26 @@ mdb_page_alloc(MDB_cursor *mc, int num, MDB_page **mp)
env->me_freelist_start = 1;
break;
}
}
if (op == MDB_PREV) {
// move to previous entry until we are before the last start time.
// note that occasionally this might take multiple iterations if we are in the middle of a
// rebalance and a node being moved
while (last >= env->me_freelist_start) {
// go to previous entry, through prev iteration
rc = mdb_cursor_get(&m2, &key, NULL, MDB_PREV);
if (rc) {
if (rc == MDB_NOTFOUND) {
// reached the very start, mark it as 1
env->me_freelist_start = 1;
break;
}
goto fail;
} else
mdb_cassert(&m2, key.mv_size > 0);
last = *(txnid_t *) key.mv_data;
}
if (rc == MDB_NOTFOUND) break;
env->me_freelist_start = last;
}
if (!last) {
Expand Down Expand Up @@ -4288,9 +4270,7 @@ mdb_freelist_save(MDB_txn *txn)
total_room = head_room = 0;
mdb_tassert(txn, head_id >= env->me_freelist_start);
//fprintf(stderr, "Deleting free list record %u\n", head_id);
env->me_freelist_state = MDB_FREELIST_DELETING; // signal that we are deleting from the freelist, which means we can't iterate backwards
rc = mdb_cursor_del(&mc, 0);
env->me_freelist_state = 0;
if (rc) {
last_error = "Attempting to delete free-space record";
return rc;
Expand Down Expand Up @@ -10509,7 +10489,6 @@ mdb_node_move(MDB_cursor *csrc, MDB_cursor *cdst, int fromleft)
if ((rc = mdb_page_touch(csrc)) ||
(rc = mdb_page_touch(cdst)))
return rc;

if (IS_LEAF2(csrc->mc_pg[csrc->mc_top])) {
key.mv_size = csrc->mc_db->md_pad;
key.mv_data = LEAF2KEY(csrc->mc_pg[csrc->mc_top], csrc->mc_ki[csrc->mc_top], key.mv_size);
Expand Down
2 changes: 1 addition & 1 deletion package.json
Original file line number Diff line number Diff line change
Expand Up @@ -80,7 +80,7 @@
"dependencies": {
"msgpackr": "^1.9.9",
"node-addon-api": "^6.1.0",
"node-gyp-build-optional-packages": "5.1.1",
"node-gyp-build-optional-packages": "5.2.0",
"ordered-binary": "^1.4.1",
"weak-lru-cache": "^1.2.2"
},
Expand Down
11 changes: 9 additions & 2 deletions test/index.test.js
Original file line number Diff line number Diff line change
Expand Up @@ -414,11 +414,18 @@ describe('lmdb-js', function () {
let promise;
let additive = 'this is more text';
for (let i = 0; i < 7; i++) additive += additive;
let read_txn = db.useReadTransaction();
for (let i = 0; i < 5000; i++) {
if (Math.random() < 0.3) {
read_txn.done();
read_txn = db.useReadTransaction();
}
let text = 'this is a test';
while (random() < 0.95) text += additive;
promise = db.put(i % 10, text);
if (i % 16 == 0) {
if (random() < 0.4) promise = db.remove(i % 40);
else promise = db.put(i % 40, text);

if (i % 2 == 0) {
await promise;
}
}
Expand Down
Loading