diff --git a/README.md b/README.md index 02e631c..e0cbfbd 100644 --- a/README.md +++ b/README.md @@ -382,11 +382,11 @@ for await (const [key, value] of db.iterator()) { > :pushpin: The key structure is equal to that of [`subleveldown`](https://github.com/Level/subleveldown) which offered sublevels before they were built-in to `abstract-level`. This means that an `abstract-level` sublevel can read sublevels previously created with (and populated by) `subleveldown`. -Internally, sublevels operate on keys that are either a string, Buffer or Uint8Array, depending on parent database and choice of encoding. Which is to say: binary keys are fully supported. The `name` must however always be a string and can only contain ASCII characters. +Internally, sublevels operate on keys that are either a string, Buffer or Uint8Array, depending on parent database and choice of encoding. Which is to say: binary keys are fully supported. The `name` must however always be a string. The optional `options` object may contain: -- `separator` (string, default: `'!'`): Character for separating sublevel names from user keys and each other. Must sort before characters used in `name`. An error will be thrown if that's not the case. +- `separator` (string, default: `'!'`): Character for separating sublevel names from user keys and each other. - `keyEncoding` (string or object, default `'utf8'`): encoding to use for keys - `valueEncoding` (string or object, default `'utf8'`): encoding to use for values. @@ -1313,10 +1313,6 @@ Data could not be read (from an underlying store) due to a corruption. Data could not be read (from an underlying store) due to an input/output error, for example from the filesystem. -#### `LEVEL_INVALID_PREFIX` - -When a sublevel prefix contains characters outside of the supported byte range. - #### `LEVEL_NOT_SUPPORTED` When a module needs a certain feature, typically as indicated by `db.supports`, but that feature is not available on a database argument or other. For example, some kind of plugin may depend on snapshots: diff --git a/lib/abstract-sublevel.js b/lib/abstract-sublevel.js index 6b102b1..ee54c8c 100644 --- a/lib/abstract-sublevel.js +++ b/lib/abstract-sublevel.js @@ -1,6 +1,5 @@ 'use strict' -const ModuleError = require('module-error') const { Buffer } = require('buffer') || {} const { AbstractSublevelIterator, @@ -38,18 +37,11 @@ module.exports = function ({ AbstractLevel }) { const { separator, manifest, ...forward } = AbstractSublevel.defaults(options) const names = [].concat(name).map(name => trim(name, separator)) - // Reserve one character between separator and name to give us an upper bound + // Reserve one character between separator and name to give us an upper bound, by + // default '"'. Keys should sort like ['!a!', '!a!!a!', '!a"', '!aa!', '!b!'] const reserved = separator.charCodeAt(0) + 1 const root = db[kRoot] || db - // Keys should sort like ['!a!', '!a!!a!', '!a"', '!aa!', '!b!']. - // Use ASCII for consistent length between string, Buffer and Uint8Array - if (!names.every(name => textEncoder.encode(name).every(x => x > reserved && x < 127))) { - throw new ModuleError(`Sublevel name must use bytes > ${reserved} < ${127}`, { - code: 'LEVEL_INVALID_PREFIX' - }) - } - super(mergeManifests(db, manifest), forward) const localPrefix = names.map(name => separator + name + separator).join('') @@ -179,14 +171,22 @@ module.exports = function ({ AbstractLevel }) { // TODO (refactor): move to AbstractLevel this.#prefixRange(options, options.keyEncoding) const iterator = this[kRoot].iterator(options) - const unfix = this.#unfix.get(this.#globalPrefix.utf8.length, options.keyEncoding) + const unfix = this.#unfix.get( + this.#globalPrefix.utf8.length, + this.#globalPrefix.view.byteLength, + options.keyEncoding + ) return new AbstractSublevelIterator(this, options, iterator, unfix) } _keys (options) { this.#prefixRange(options, options.keyEncoding) const iterator = this[kRoot].keys(options) - const unfix = this.#unfix.get(this.#globalPrefix.utf8.length, options.keyEncoding) + const unfix = this.#unfix.get( + this.#globalPrefix.utf8.length, + this.#globalPrefix.view.byteLength, + options.keyEncoding + ) return new AbstractSublevelKeyIterator(this, options, iterator, unfix) } @@ -252,20 +252,24 @@ class Unfixer { this.cache = new Map() } - get (prefixLength, keyFormat) { + get (stringLength, byteLength, keyFormat) { let unfix = this.cache.get(keyFormat) if (unfix === undefined) { if (keyFormat === 'view') { - unfix = function (prefixLength, key) { + unfix = function (byteLength, key) { // Avoid Uint8Array#slice() because it copies - return key.subarray(prefixLength) - }.bind(null, prefixLength) + return key.subarray(byteLength) + }.bind(null, byteLength) + } else if (keyFormat === 'utf8') { + unfix = function (stringLength, key) { + return key.slice(stringLength) + }.bind(null, stringLength) } else { - unfix = function (prefixLength, key) { + unfix = function (byteLength, key) { // Avoid Buffer#subarray() because it's slow - return key.slice(prefixLength) - }.bind(null, prefixLength) + return key.slice(byteLength) + }.bind(null, byteLength) } this.cache.set(keyFormat, unfix) diff --git a/test/self/sublevel-test.js b/test/self/sublevel-test.js index 9ec4a4a..6dbb77d 100644 --- a/test/self/sublevel-test.js +++ b/test/self/sublevel-test.js @@ -175,15 +175,6 @@ test('sublevel name and options', function (t) { t.end() }) - t.test('invalid sublevel prefix', function (t) { - t.throws(() => new NoopLevel().sublevel('foo\x05'), (err) => err.code === 'LEVEL_INVALID_PREFIX') - t.throws(() => new NoopLevel().sublevel('foo\xff'), (err) => err.code === 'LEVEL_INVALID_PREFIX') - t.throws(() => new NoopLevel().sublevel(['ok', 'foo\xff']), (err) => err.code === 'LEVEL_INVALID_PREFIX') - t.throws(() => new NoopLevel().sublevel('foo!', { separator: '@' }), (err) => err.code === 'LEVEL_INVALID_PREFIX') - t.throws(() => new NoopLevel().sublevel(['ok', 'foo!'], { separator: '@' }), (err) => err.code === 'LEVEL_INVALID_PREFIX') - t.end() - }) - // See https://github.com/Level/subleveldown/issues/78 t.test('doubly nested sublevel has correct prefix', async function (t) { t.plan(1) diff --git a/test/sublevel-test.js b/test/sublevel-test.js index 7fc7336..8df084f 100644 --- a/test/sublevel-test.js +++ b/test/sublevel-test.js @@ -206,4 +206,20 @@ exports.all = function (test, testCommon) { }) } } + + test('sublevel name with unicode', async function (t) { + const db = testCommon.factory() + const name = '🐄' + const sub = db.sublevel(name) + + // To illustrate why this test matters. We would remove too many + // characters from the prefixed key if we use the wrong length. + t.is(name.length, 2) + t.is(new TextEncoder().encode(name).byteLength, 4) + + await sub.put('a', 'a') + t.same(await sub.keys().all(), ['a'], 'correctly removed prefix') + + return db.close() + }) }