diff --git a/package-lock.json b/package-lock.json index 54febb3..1370b0c 100644 --- a/package-lock.json +++ b/package-lock.json @@ -14,7 +14,7 @@ "@ipld/dag-json": "^8.0.11", "@ipld/dag-pb": "^2.1.18", "@web3-storage/fast-unixfs-exporter": "^0.2.0", - "@web3-storage/gateway-lib": "^2.0.1", + "@web3-storage/gateway-lib": "^2.0.2", "cardex": "^1.0.0", "chardet": "^1.5.0", "dagula": "^4.1.1", diff --git a/src/bindings.d.ts b/src/bindings.d.ts index 3744e50..2bd199a 100644 --- a/src/bindings.d.ts +++ b/src/bindings.d.ts @@ -9,6 +9,7 @@ export interface Environment { CARPARK: R2Bucket DUDEWHERE: R2Bucket SATNAV: R2Bucket + MAX_SHARDS: string } export interface CarCidsContext extends Context { diff --git a/src/middleware.js b/src/middleware.js index a9b91a5..8cbf5ad 100644 --- a/src/middleware.js +++ b/src/middleware.js @@ -40,6 +40,11 @@ export function withCarCids (handler) { if (!ctx.dataCid) throw new Error('missing data CID') if (!ctx.searchParams) throw new Error('missing URL search params') + // Cloudflare currently sets a limit of 1000 sub-requests within the worker context + // If we have a given root CID splitted across hundreds of CARs, freeway will hit + // the sub-requests limit and not serve content anyway + const maxShards = env.MAX_SHARDS ? parseInt(env.MAX_SHARDS) : 250 + const carCids = ctx.searchParams.getAll('origin').flatMap(str => { return str.split(',') .reduce((/** @type {import('multiformats').CID[]} */cids, str) => { @@ -62,10 +67,19 @@ export function withCarCids (handler) { const results = await env.DUDEWHERE.list({ prefix: `${ctx.dataCid}/`, cursor }) if (!results || !results.objects.length) break carCids.push(...results.objects.map(o => parseCid(o.key.split('/')[1]))) + + if (carCids.length > maxShards) { + throw new HttpError('request exceeds maximum DAG shards', { status: 501 }) + } + if (!results.truncated) break cursor = results.cursor } console.log(`dude where's my CAR? ${ctx.dataCid} => ${carCids}`) + } else { + if (carCids.length > maxShards) { + throw new HttpError('request exceeds maximum DAG shards', { status: 501 }) + } } if (!carCids.length) { diff --git a/test/index.spec.js b/test/index.spec.js index be5f141..576e7ac 100644 --- a/test/index.spec.js +++ b/test/index.spec.js @@ -68,4 +68,14 @@ describe('freeway', () => { const output = new Uint8Array(await res.arrayBuffer()) assert(equals(input[0].content, output)) }) + + it('should fail when divided into more than 120 CAR files', async () => { + const input = [{ path: 'sargo.tar.xz', content: randomBytes(1218523560) }] + const { dataCid } = await builder.add(input) + + const res = await miniflare.dispatchFetch(`http://localhost:8787/ipfs/${dataCid}/${input[0].path}`) + + assert(!res.ok) + assert.equal(res.status, 501) + }) }) diff --git a/wrangler.toml b/wrangler.toml index 4bbeaa7..afbbbde 100644 --- a/wrangler.toml +++ b/wrangler.toml @@ -27,6 +27,9 @@ r2_buckets = [ [env.production.build] command = "npm run build" +[env.production.vars] +MAX_SHARDS = "250" + # Staging! [env.staging] account_id = "fffa4b4363a7e5250af8357087263b3a" @@ -40,6 +43,9 @@ r2_buckets = [ [env.staging.build] command = "npm run build" +[env.staging.vars] +MAX_SHARDS = "250" + # Test! [env.test] workers_dev = true @@ -51,6 +57,7 @@ r2_buckets = [ [env.test.vars] DEBUG = "true" +MAX_SHARDS = "120" [env.alanshaw] workers_dev = true