Skip to content

Commit

Permalink
fix: Bucket.rmdir should delete files in batches
Browse files Browse the repository at this point in the history
  • Loading branch information
dominique-pfister committed Oct 3, 2024
1 parent 95e817a commit 5ae018e
Show file tree
Hide file tree
Showing 2 changed files with 58 additions and 35 deletions.
25 changes: 18 additions & 7 deletions packages/helix-shared-storage/src/storage.js
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,11 @@ import processQueue from '@adobe/helix-shared-process-queue';
const gzip = promisify(zlib.gzip);
const gunzip = promisify(zlib.gunzip);

/**
* Maximum number of objects to delete in one operation.
*/
const MAX_DELETE_OBJECTS = 1000;

/**
* @typedef {import('@aws-sdk/client-s3').CommandInput} CommandInput
* @typedef {import('./storage.d').Bucket} BucketType
Expand Down Expand Up @@ -557,28 +562,34 @@ class Bucket {
}

async rmdir(src) {
const { log } = this;
const { bucket, log } = this;
src = sanitizeKey(src);
log.info(`fetching list of files to delete from ${this.bucket}/${src}`);
const items = await this.list(src);

// slice into chunks of MAX_DELETE_OBJECTS at most
const chunks = Array.from({
length: Math.ceil(items.length / MAX_DELETE_OBJECTS),
}, (v, i) => items.slice(i * MAX_DELETE_OBJECTS, i * MAX_DELETE_OBJECTS + MAX_DELETE_OBJECTS));

let oks = 0;
let errors = 0;
await processQueue(items, async (item) => {
const { key } = item;
log.info(`deleting ${this.bucket}/${key}`);
await processQueue(chunks, async (chunk) => {
log.info(`deleting ${chunk.length} from ${bucket}`);
const input = {
Bucket: this.bucket,
Key: key,
Delete: {
Objects: items.map((item) => ({ Key: item.key })),
},
};

try {
// delete on s3 and r2 (mirror) in parallel
await this.sendToS3andR2(DeleteObjectCommand, input);
await this.sendToS3andR2(DeleteObjectsCommand, input);
oks += 1;
} catch (e) {
// at least 1 cmd failed
log.warn(`error while deleting ${key}: ${e.$metadata.httpStatusCode}`);
log.warn(`error while deleting ${items.length} from ${bucket}: ${e.$metadata.httpStatusCode}`);
errors += 1;
}
}, 64);
Expand Down
68 changes: 40 additions & 28 deletions packages/helix-shared-storage/test/storage.test.js
Original file line number Diff line number Diff line change
Expand Up @@ -868,25 +868,21 @@ describe('Storage test', () => {
nock('https://helix-code-bus.s3.fake.amazonaws.com')
.get('/?list-type=2&prefix=owner%2Frepo%2Fnew-branch%2F')
.reply(200, listReply.response)
.delete(/.*/)
.times(10)
.reply((uri) => {
deletes.s3.push(uri);
// reject first uri
if (deletes.s3.length === 1) {
return [404];
}
.post('/?delete=')
.reply(async (uri, body) => {
const xml = await xml2js.parseStringPromise(body);
xml.Delete.Object.forEach((o) => {
deletes.s3.push(o.Key[0]);
});
return [204];
});
nock(`https://helix-code-bus.${CLOUDFLARE_ACCOUNT_ID}.r2.cloudflarestorage.com`)
.delete(/.*/)
.times(10)
.reply((uri) => {
deletes.r2.push(uri);
// reject first uri
if (deletes.r2.length === 1) {
return [404];
}
.post('/?delete=')
.reply(async (uri, body) => {
const xml = await xml2js.parseStringPromise(body);
xml.Delete.Object.forEach((o) => {
deletes.r2.push(o.Key[0]);
});
return [204];
});

Expand All @@ -896,19 +892,35 @@ describe('Storage test', () => {
deletes.s3.sort();
deletes.r2.sort();
const expectedDeletes = [
'/owner/repo/ref/.circleci/config.yml?x-id=DeleteObject',
'/owner/repo/ref/.gitignore?x-id=DeleteObject',
'/owner/repo/ref/.vscode/launch.json?x-id=DeleteObject',
'/owner/repo/ref/.vscode/settings.json?x-id=DeleteObject',
'/owner/repo/ref/README.md?x-id=DeleteObject',
'/owner/repo/ref/helix_logo.png?x-id=DeleteObject',
'/owner/repo/ref/htdocs/favicon.ico?x-id=DeleteObject',
'/owner/repo/ref/htdocs/style.css?x-id=DeleteObject',
'/owner/repo/ref/index.md?x-id=DeleteObject',
'/owner/repo/ref/src/html.pre.js?x-id=DeleteObject',
'owner/repo/ref/.circleci/config.yml',
'owner/repo/ref/.gitignore',
'owner/repo/ref/.vscode/launch.json',
'owner/repo/ref/.vscode/settings.json',
'owner/repo/ref/README.md',
'owner/repo/ref/helix_logo.png',
'owner/repo/ref/htdocs/favicon.ico',
'owner/repo/ref/htdocs/style.css',
'owner/repo/ref/index.md',
'owner/repo/ref/src/html.pre.js',
];
assert.deepEqual(deletes.s3, expectedDeletes);
assert.deepEqual(deletes.r2, expectedDeletes);
assert.deepStrictEqual(deletes.s3, expectedDeletes);
assert.deepStrictEqual(deletes.r2, expectedDeletes);
});

it('delete objects can fail', async () => {
const listReply = JSON.parse(await fs.readFile(path.resolve(__testdir, 'fixtures', 'list-reply.json'), 'utf-8'));

nock('https://helix-code-bus.s3.fake.amazonaws.com')
.get('/?list-type=2&prefix=owner%2Frepo%2Fnew-branch%2F')
.reply(200, listReply.response)
.post('/?delete=')
.reply(404);
nock(`https://helix-code-bus.${CLOUDFLARE_ACCOUNT_ID}.r2.cloudflarestorage.com`)
.post('/?delete=')
.reply(404);

const bus = storage.codeBus();
await bus.rmdir('/owner/repo/new-branch/');
});

it('rmdir works for empty dir', async () => {
Expand Down

0 comments on commit 5ae018e

Please sign in to comment.