From 1203d40d01181649829aa9fe13dac294d16d3d19 Mon Sep 17 00:00:00 2001 From: Vasco Santos Date: Tue, 28 Feb 2023 11:55:15 +0100 Subject: [PATCH 1/2] feat: set max number of car cids to resolve --- src/bindings.d.ts | 1 + src/middleware.js | 7 +++++++ test/index.spec.js | 10 ++++++++++ wrangler.toml | 7 +++++++ 4 files changed, 25 insertions(+) diff --git a/src/bindings.d.ts b/src/bindings.d.ts index 3744e50..6b91f06 100644 --- a/src/bindings.d.ts +++ b/src/bindings.d.ts @@ -9,6 +9,7 @@ export interface Environment { CARPARK: R2Bucket DUDEWHERE: R2Bucket SATNAV: R2Bucket + MAX_CAR_CIDS_TO_RESOLVE: string } export interface CarCidsContext extends Context { diff --git a/src/middleware.js b/src/middleware.js index a9b91a5..e2a0012 100644 --- a/src/middleware.js +++ b/src/middleware.js @@ -72,6 +72,13 @@ export function withCarCids (handler) { throw new HttpError('missing origin CAR CID(s)', { status: 400 }) } + // Cloudflare currently sets a limit of 1000 sub-requests within the worker context + // If we have a given root CID splitted across hundreds of CARs, freeway will hit + // the sub-requests limit and not serve content anyway + if (carCids.length > Number(env.MAX_CAR_CIDS_TO_RESOLVE)) { + throw new HttpError('number CAR CIDs is too large to resolve', { status: 501 }) + } + return handler(request, env, { ...ctx, carCids }) } } diff --git a/test/index.spec.js b/test/index.spec.js index be5f141..576e7ac 100644 --- a/test/index.spec.js +++ b/test/index.spec.js @@ -68,4 +68,14 @@ describe('freeway', () => { const output = new Uint8Array(await res.arrayBuffer()) assert(equals(input[0].content, output)) }) + + it('should fail when divided into more than 120 CAR files', async () => { + const input = [{ path: 'sargo.tar.xz', content: randomBytes(1218523560) }] + const { dataCid } = await builder.add(input) + + const res = await miniflare.dispatchFetch(`http://localhost:8787/ipfs/${dataCid}/${input[0].path}`) + + assert(!res.ok) + assert.equal(res.status, 501) + }) }) diff --git a/wrangler.toml b/wrangler.toml index 4bbeaa7..499d729 100644 --- a/wrangler.toml +++ b/wrangler.toml @@ -27,6 +27,9 @@ r2_buckets = [ [env.production.build] command = "npm run build" +[env.production.vars] +MAX_CAR_CIDS_TO_RESOLVE = "250" + # Staging! [env.staging] account_id = "fffa4b4363a7e5250af8357087263b3a" @@ -40,6 +43,9 @@ r2_buckets = [ [env.staging.build] command = "npm run build" +[env.staging.vars] +MAX_CAR_CIDS_TO_RESOLVE = "250" + # Test! [env.test] workers_dev = true @@ -51,6 +57,7 @@ r2_buckets = [ [env.test.vars] DEBUG = "true" +MAX_CAR_CIDS_TO_RESOLVE = "120" [env.alanshaw] workers_dev = true From 9d4c9ffa0e006b8cd8085438965c78c4b82769ac Mon Sep 17 00:00:00 2001 From: Vasco Santos Date: Wed, 1 Mar 2023 16:16:07 +0100 Subject: [PATCH 2/2] chore: apply suggestions from code review Co-authored-by: Alan Shaw --- package-lock.json | 2 +- src/bindings.d.ts | 2 +- src/middleware.js | 21 ++++++++++++++------- wrangler.toml | 6 +++--- 4 files changed, 19 insertions(+), 12 deletions(-) diff --git a/package-lock.json b/package-lock.json index 54febb3..1370b0c 100644 --- a/package-lock.json +++ b/package-lock.json @@ -14,7 +14,7 @@ "@ipld/dag-json": "^8.0.11", "@ipld/dag-pb": "^2.1.18", "@web3-storage/fast-unixfs-exporter": "^0.2.0", - "@web3-storage/gateway-lib": "^2.0.1", + "@web3-storage/gateway-lib": "^2.0.2", "cardex": "^1.0.0", "chardet": "^1.5.0", "dagula": "^4.1.1", diff --git a/src/bindings.d.ts b/src/bindings.d.ts index 6b91f06..2bd199a 100644 --- a/src/bindings.d.ts +++ b/src/bindings.d.ts @@ -9,7 +9,7 @@ export interface Environment { CARPARK: R2Bucket DUDEWHERE: R2Bucket SATNAV: R2Bucket - MAX_CAR_CIDS_TO_RESOLVE: string + MAX_SHARDS: string } export interface CarCidsContext extends Context { diff --git a/src/middleware.js b/src/middleware.js index e2a0012..8cbf5ad 100644 --- a/src/middleware.js +++ b/src/middleware.js @@ -40,6 +40,11 @@ export function withCarCids (handler) { if (!ctx.dataCid) throw new Error('missing data CID') if (!ctx.searchParams) throw new Error('missing URL search params') + // Cloudflare currently sets a limit of 1000 sub-requests within the worker context + // If we have a given root CID splitted across hundreds of CARs, freeway will hit + // the sub-requests limit and not serve content anyway + const maxShards = env.MAX_SHARDS ? parseInt(env.MAX_SHARDS) : 250 + const carCids = ctx.searchParams.getAll('origin').flatMap(str => { return str.split(',') .reduce((/** @type {import('multiformats').CID[]} */cids, str) => { @@ -62,23 +67,25 @@ export function withCarCids (handler) { const results = await env.DUDEWHERE.list({ prefix: `${ctx.dataCid}/`, cursor }) if (!results || !results.objects.length) break carCids.push(...results.objects.map(o => parseCid(o.key.split('/')[1]))) + + if (carCids.length > maxShards) { + throw new HttpError('request exceeds maximum DAG shards', { status: 501 }) + } + if (!results.truncated) break cursor = results.cursor } console.log(`dude where's my CAR? ${ctx.dataCid} => ${carCids}`) + } else { + if (carCids.length > maxShards) { + throw new HttpError('request exceeds maximum DAG shards', { status: 501 }) + } } if (!carCids.length) { throw new HttpError('missing origin CAR CID(s)', { status: 400 }) } - // Cloudflare currently sets a limit of 1000 sub-requests within the worker context - // If we have a given root CID splitted across hundreds of CARs, freeway will hit - // the sub-requests limit and not serve content anyway - if (carCids.length > Number(env.MAX_CAR_CIDS_TO_RESOLVE)) { - throw new HttpError('number CAR CIDs is too large to resolve', { status: 501 }) - } - return handler(request, env, { ...ctx, carCids }) } } diff --git a/wrangler.toml b/wrangler.toml index 499d729..afbbbde 100644 --- a/wrangler.toml +++ b/wrangler.toml @@ -28,7 +28,7 @@ r2_buckets = [ command = "npm run build" [env.production.vars] -MAX_CAR_CIDS_TO_RESOLVE = "250" +MAX_SHARDS = "250" # Staging! [env.staging] @@ -44,7 +44,7 @@ r2_buckets = [ command = "npm run build" [env.staging.vars] -MAX_CAR_CIDS_TO_RESOLVE = "250" +MAX_SHARDS = "250" # Test! [env.test] @@ -57,7 +57,7 @@ r2_buckets = [ [env.test.vars] DEBUG = "true" -MAX_CAR_CIDS_TO_RESOLVE = "120" +MAX_SHARDS = "120" [env.alanshaw] workers_dev = true