From 8e57fdd2fe712426bd71f5bd5461061053e83764 Mon Sep 17 00:00:00 2001 From: Rod Vagg Date: Thu, 14 Jan 2021 21:55:29 +1100 Subject: [PATCH] feat: CID.inspectBytes() and CID.decodeFirst() --- src/cid.js | 100 ++++++++++++++++++++++------- test/fixtures/invalid-multihash.js | 5 ++ test/test-cid.js | 78 ++++++++++++++++++++-- 3 files changed, 156 insertions(+), 27 deletions(-) diff --git a/src/cid.js b/src/cid.js index 88c7b4a6..cd5fbb54 100644 --- a/src/cid.js +++ b/src/cid.js @@ -2,6 +2,7 @@ import * as varint from './varint.js' import * as Digest from './hashes/digest.js' import { base58btc } from './bases/base58.js' import { base32 } from './bases/base32.js' +import { coerce } from './bytes.js' /** * @typedef {import('./hashes/interface').MultihashDigest} MultihashDigest @@ -265,31 +266,86 @@ export default class CID { } /** - * Takes cid in a binary representation and a `base` encoder that will be used - * for default cid serialization. + * Decoded a CID from its binary representation. The byte array must contain + * only the CID with no additional bytes. * - * Throws if supplied base encoder is incompatible (CIDv0 is only compatible - * with `base58btc` encoder). - * @param {Uint8Array} cid + * An error will be thrown if the bytes provided do not contain a valid + * binary representation of a CID. + * + * @param {Uint8Array} bytes + * @returns {CID} */ - static decode (cid) { - const [version, offset] = varint.decode(cid) - switch (version) { - // CIDv0 - case 18: { - const multihash = Digest.decode(cid) - return CID.createV0(multihash) - } - // CIDv1 - case 1: { - const [code, length] = varint.decode(cid.subarray(offset)) - const digest = Digest.decode(cid.subarray(offset + length)) - return CID.createV1(code, digest) - } - default: { - throw new RangeError(`Invalid CID version ${version}`) - } + static decode (bytes) { + const [cid, remainder] = CID.decodeFirst(bytes) + if (remainder.length) { + throw new Error('Incorrect length') + } + return cid + } + + /** + * Decoded a CID from its binary representation at the begining of a byte + * array. + * + * Returns an array with the first element containing the CID and the second + * element containing the remainder of the original byte array. The remainder + * will be a zero-length byte array if the provided bytes only contained a + * binary CID representation. + * + * @param {Uint8Array} bytes + * @returns {[CID, Uint8Array]} + */ + static decodeFirst (bytes) { + const specs = CID.inspectBytes(bytes) + const prefixSize = specs.size - specs.multihashSize + const multihashBytes = coerce(bytes.subarray(prefixSize, prefixSize + specs.multihashSize)) + if (multihashBytes.byteLength !== specs.multihashSize) { + throw new Error('Incorrect length') } + const digestBytes = multihashBytes.subarray(specs.multihashSize - specs.digestSize) + const digest = new Digest.Digest(specs.multihashCode, specs.digestSize, digestBytes, multihashBytes) + const cid = specs.version === 0 ? CID.createV0(digest) : CID.createV1(specs.codec, digest) + return [cid, bytes.subarray(specs.size)] + } + + /** + * Inspect the initial bytes of a CID to determine its properties. + * + * Involves decoding up to 4 varints. Typically this will require only 4 to 6 + * bytes but for larger multicodec code values and larger multihash digest + * lengths these varints can be quite large. It is recommended that at least + * 10 bytes be made available in the `initialBytes` argument for a complete + * inspection. + * + * @param {Uint8Array} initialBytes + * @returns {{ version:number, codec:number, multihashCode:number, digestSize:number, multihashSize:number, size:number }} + */ + static inspectBytes (initialBytes) { + let offset = 0 + const next = () => { + const [i, length] = varint.decode(initialBytes.subarray(offset)) + offset += length + return i + } + + let version = next() + let codec = DAG_PB_CODE + if (version === 18) { // CIDv0 + version = 0 + offset = 0 + } else if (version === 1) { + codec = next() + } else if (version !== 1) { + throw new RangeError(`Invalid CID version ${version}`) + } + + const prefixSize = offset + const multihashCode = next() // multihash code + const digestSize = next() // multihash length + const size = offset + digestSize + const multihashSize = size - prefixSize + + return { version, codec, multihashCode, digestSize, multihashSize, size } } /** diff --git a/test/fixtures/invalid-multihash.js b/test/fixtures/invalid-multihash.js index e5b93efb..82679cba 100644 --- a/test/fixtures/invalid-multihash.js +++ b/test/fixtures/invalid-multihash.js @@ -28,4 +28,9 @@ export default [{ size: 32, hex: '2c26b46b68ffc68ff99b453c1d30413413422d706483bfa0f98a5e886266e7', message: 'Incorrect length' +}, { + code: 0x12, + size: 32, + hex: '1220ba7816bf8f01cfea414140de5dae2223b00361a396177a9cb410ff61f20015ad0000', + message: 'Incorrect length' }] diff --git a/test/test-cid.js b/test/test-cid.js index 82c9521d..040605b8 100644 --- a/test/test-cid.js +++ b/test/test-cid.js @@ -2,7 +2,7 @@ import OLDCID from 'cids' import assert from 'assert' -import { toHex, equals } from '../src/bytes.js' +import { fromHex, toHex, equals } from '../src/bytes.js' import { varint, CID } from 'multiformats' import { base58btc } from 'multiformats/bases/base58' import { base32 } from 'multiformats/bases/base32' @@ -10,13 +10,15 @@ import { base64 } from 'multiformats/bases/base64' import { sha256, sha512 } from 'multiformats/hashes/sha2' import util from 'util' import { Buffer } from 'buffer' +import invalidMultihash from './fixtures/invalid-multihash.js' + const test = it -const same = (x, y) => { - if (x instanceof Uint8Array && y instanceof Uint8Array) { - if (Buffer.compare(Buffer.from(x), Buffer.from(y)) === 0) return +const same = (actual, expected) => { + if (actual instanceof Uint8Array && expected instanceof Uint8Array) { + if (Buffer.compare(Buffer.from(actual), Buffer.from(expected)) === 0) return } - return assert.deepStrictEqual(x, y) + return assert.deepStrictEqual(actual, expected) } // eslint-disable-next-line no-unused-vars @@ -119,6 +121,35 @@ describe('CID', () => { const newCid = CID.asCID(oldCid) same(newCid.toString(), cidStr) }) + + test('inspect bytes', () => { + const byts = fromHex('1220ba7816bf8f01cfea414140de5dae2223b00361a396177a9cb410ff61f20015ad') + const inspected = CID.inspectBytes(byts.subarray(0, 10)) // should only need the first few bytes + same({ + version: 0, + codec: 0x70, + multihashCode: 0x12, + multihashSize: 34, + digestSize: 32, + size: 34 + }, inspected) + }) + + describe('decodeFirst', () => { + test('no remainder', () => { + const byts = fromHex('1220ba7816bf8f01cfea414140de5dae2223b00361a396177a9cb410ff61f20015ad') + const [cid, remainder] = CID.decodeFirst(byts) + same(cid.toString(), 'QmatYkNGZnELf8cAGdyJpUca2PyY4szai3RHyyWofNY1pY') + same(remainder.byteLength, 0) + }) + + test('remainder', () => { + const byts = fromHex('1220ba7816bf8f01cfea414140de5dae2223b00361a396177a9cb410ff61f20015ad0102030405') + const [cid, remainder] = CID.decodeFirst(byts) + same(cid.toString(), 'QmatYkNGZnELf8cAGdyJpUca2PyY4szai3RHyyWofNY1pY') + same(toHex(remainder), '0102030405') + }) + }) }) describe('v1', () => { @@ -282,6 +313,13 @@ describe('CID', () => { const name = `CID.create(${version}, ${code}, ${mh})` test(name, async () => await testThrowAny(() => CID.create(version, code, hash))) } + + test('invalid fixtures', async () => { + for (const test of invalidMultihash) { + const buff = fromHex(`0171${test.hex}`) + assert.throws(() => CID.decode(buff), new RegExp(test.message)) + } + }) }) describe('idempotence', () => { @@ -482,6 +520,35 @@ describe('CID', () => { }) }) + test('inspect bytes', () => { + const byts = fromHex('01711220ba7816bf8f01cfea414140de5dae2223b00361a396177a9cb410ff61f20015ad') + const inspected = CID.inspectBytes(byts.subarray(0, 10)) // should only need the first few bytes + same({ + version: 1, + codec: 0x71, + multihashCode: 0x12, + multihashSize: 34, + digestSize: 32, + size: 36 + }, inspected) + + describe('decodeFirst', () => { + test('no remainder', () => { + const byts = fromHex('01711220ba7816bf8f01cfea414140de5dae2223b00361a396177a9cb410ff61f20015ad') + const [cid, remainder] = CID.decodeFirst(byts) + same(cid.toString(), 'bafyreif2pall7dybz7vecqka3zo24irdwabwdi4wc55jznaq75q7eaavvu') + same(remainder.byteLength, 0) + }) + + test('remainder', () => { + const byts = fromHex('01711220ba7816bf8f01cfea414140de5dae2223b00361a396177a9cb410ff61f20015ad0102030405') + const [cid, remainder] = CID.decodeFirst(byts) + same(cid.toString(), 'bafyreif2pall7dybz7vecqka3zo24irdwabwdi4wc55jznaq75q7eaavvu') + same(toHex(remainder), '0102030405') + }) + }) + }) + test('new CID from old CID', async () => { const hash = await sha256.digest(Buffer.from('abc')) const cid = CID.asCID(new OLDCID(1, 'raw', Buffer.from(hash.bytes))) @@ -527,6 +594,7 @@ describe('CID', () => { const encoded = varint.encodeTo(2, new Uint8Array(32)) await testThrow(() => CID.decode(encoded), 'Invalid CID version 2') }) + test('buffer', async () => { const hash = await sha256.digest(Buffer.from('abc')) const cid = CID.create(1, 112, hash)