Skip to content

Commit

Permalink
fix: simplified vaults branch garbage collection
Browse files Browse the repository at this point in the history
[ci skip]
  • Loading branch information
tegefaulkes committed May 15, 2024
1 parent 77c83b8 commit a6a274a
Show file tree
Hide file tree
Showing 6 changed files with 88 additions and 112 deletions.
31 changes: 31 additions & 0 deletions src/git/utils.ts
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ import type {
RequestType,
} from './types';
import type { EncryptedFS } from 'encryptedfs';
import path from 'path';
import git from 'isomorphic-git';
import { requestTypes } from './types';
import * as utils from '../utils';
Expand Down Expand Up @@ -230,6 +231,35 @@ async function listObjects({
return [...commits, ...trees, ...blobs, ...tags];
}

const objectsDirName = 'objects';
const excludedDirs = ['pack', 'info'];

/**
* Walks the filesystem to list out all git objects in the objects directory
*/
async function listObjectsAll({
fs,
gitDir,
}: {
fs: EncryptedFS;
gitDir: string;
}) {
const objectsDirPath = path.join(gitDir, objectsDirName);
const objectSet: Set<string> = new Set();
const objectDirs = await fs.promises.readdir(objectsDirPath);
for (const objectDir of objectDirs) {
if (typeof objectDir !== 'string') never();
if (excludedDirs.includes(objectDir)) continue;
const objectIds = await fs.promises.readdir(
path.join(objectsDirPath, objectDir),
);
for (const objectId of objectIds) {
objectSet.add(objectDir + objectId);
}
}
return [...objectSet];
}

/**
* Parses a want/has line from ref negotiation phase.
*/
Expand Down Expand Up @@ -309,6 +339,7 @@ export {
listReferencesGenerator,
referenceCapability,
listObjects,
listObjectsAll,
parseRequestLine,
isObjectId,
assertObjectId,
Expand Down
128 changes: 53 additions & 75 deletions src/vaults/VaultInternal.ts
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@ import * as vaultsEvents from './events';
import { tagLast } from './types';
import * as ids from '../ids';
import * as nodesUtils from '../nodes/utils';
import * as gitUtils from '../git/utils';
import * as utils from '../utils';

type RemoteInfo = {
Expand Down Expand Up @@ -754,7 +755,7 @@ class VaultInternal {
// This ensures that any uncommitted state is dropped
await this.cleanWorkingDirectory();
// Do global GC operation
await this.garbageCollectGitObjects();
await this.garbageCollectGitObjectsGlobal();

// Setting dirty back to false
await tran.put(
Expand Down Expand Up @@ -998,27 +999,7 @@ class VaultInternal {
});
// We clean old history if a commit was made on previous version
if (headRef !== masterRef) {
// Delete old commits following chain from masterRef -> headRef
let currentRef = masterRef;
while (currentRef !== headRef) {
// Read commit info
const commit = await git.readCommit({
fs: this.efs,
dir: this.vaultDataDir,
gitdir: this.vaultGitDir,
oid: currentRef,
});
// Delete commit
await vaultsUtils.deleteObject(
this.efs,
this.vaultGitDir,
commit.oid,
);
// Getting new ref
const nextRef = commit.commit.parent.pop();
if (nextRef == null) break;
currentRef = nextRef;
}
await this.garbageCollectGitObjectsLocal(masterRef, headRef);
}
}
}
Expand Down Expand Up @@ -1064,69 +1045,66 @@ class VaultInternal {
}

/**
* Deletes any git objects that can't be reached from the canonicalBranch
* This will walk the current canonicalBranch history and delete any objects that are not a part of it.
* This is a dumb method since it will compare all objects to a walked path. There are better ways to do this.
*/
protected async garbageCollectGitObjects() {
// To garbage collect the git objects,
// we need to walk all objects connected to the master branch
// and delete the object files that are not touched by this walk
const touchedOids = {};

protected async garbageCollectGitObjectsGlobal() {
const objectIdsAll = await gitUtils.listObjectsAll({
fs: this.efs,
gitDir: this.vaultGitDir,
});
const objects = new Set(objectIdsAll);
const masterRef = await git.resolveRef({
fs: this.efs,
dir: this.vaultDataDir,
gitdir: this.vaultGitDir,
ref: vaultsUtils.canonicalBranch,
});
const queuedOids: string[] = [masterRef];
while (queuedOids.length > 0) {
const currentOid = queuedOids.shift()!;
if (touchedOids[currentOid] === null) continue;
const result = await git.readObject({
fs: this.efs,
dir: this.vaultDataDir,
gitdir: this.vaultGitDir,
oid: currentOid,
});
touchedOids[result.oid] = result.type;
if (result.format !== 'parsed') continue;
switch (result.type) {
case 'commit':
{
const object = result.object;
queuedOids.push(...object.parent);
queuedOids.push(object.tree);
}
break;
case 'tree':
{
const object = result.object;
for (const item of object) {
touchedOids[item.oid] = item.type;
}
}
break;
default: {
utils.never();
}
}
const reachableObjects = await gitUtils.listObjects({
fs: this.efs,
dir: this.vaultDataDir,
gitDir: this.vaultGitDir,
wants: [masterRef],
haves: [],
});
// Walk from head to all reachable objects
for (const objectReachable of reachableObjects) {
objects.delete(objectReachable);
}
// Walking all objects
const objectPath = path.join(this.vaultGitDir, 'objects');
const buckets = (await this.efs.readdir(objectPath)).filter((item) => {
return item !== 'info' && item !== 'pack';
// Any objects left in `objects` was unreachable, thus they are a part of orphaned branches
// So we want to delete them.
const deletePs: Array<Promise<void>> = [];
for (const objectId of objects) {
deletePs.push(
vaultsUtils.deleteObject(this.efs, this.vaultGitDir, objectId),
);
}
await Promise.all(deletePs);
}

/**
* This will walk from the `startId` to the `StopId` deleting objects as it goes.
* This is smarter since it only walks over the old history and not everything.
*/
protected async garbageCollectGitObjectsLocal(
startId: string,
stopId: string,
) {
const objects = await gitUtils.listObjects({
fs: this.efs,
dir: this.vaultDataDir,
gitDir: this.vaultGitDir,
wants: [startId],
haves: [stopId],
});
for (const bucket of buckets) {
const bucketPath = path.join(objectPath, bucket.toString());
const oids = await this.efs.readdir(bucketPath);
for (const shortOid of oids) {
const oidPath = path.join(bucketPath, shortOid.toString());
const oid = bucket.toString() + shortOid.toString();
if (touchedOids[oid] === undefined) {
// Removing unused objects
await this.efs.unlink(oidPath);
}
}
const deletePs: Array<Promise<void>> = [];
for (const objectId of objects) {
deletePs.push(
vaultsUtils.deleteObject(this.efs, this.vaultGitDir, objectId),
);
}
await Promise.all(deletePs);
}
}

Expand Down
2 changes: 1 addition & 1 deletion src/vaults/utils.ts
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@ function commitAuthor(nodeId: NodeId): { name: string; email: string } {
}

async function* readDirRecursively(
fs: FileSystemReadable,
fs,
dir = '.',
): AsyncGenerator<string, void, void> {
const dirents = await fs.promises.readdir(dir);
Expand Down
3 changes: 2 additions & 1 deletion tests/git/http.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ import git from 'isomorphic-git';
import { test } from '@fast-check/jest';
import fc from 'fast-check';
import * as gitHttp from '@/git/http';
import * as gitUtils from '@/git/utils';
import * as validationErrors from '@/validation/errors';
import * as gitTestUtils from './utils';

Expand Down Expand Up @@ -191,7 +192,7 @@ describe('Git Http', () => {
},
],
});
const objectIds = await gitTestUtils.listGitObjects(gitDirs);
const objectIds = await gitUtils.listObjectsAll(gitDirs);
const gen = gitHttp.generatePackData({
...gitDirs,
objectIds,
Expand Down
2 changes: 1 addition & 1 deletion tests/git/utils.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -183,7 +183,7 @@ describe('Git utils', () => {
wants: commitIds,
haves: [],
});
const expectedObjectIds = await gitTestUtils.listGitObjects(gitDirs);
const expectedObjectIds = await gitUtils.listObjectsAll(gitDirs);
// Found objects should include all the commits
expect(objectList).toIncludeAllMembers(commitIds);
// Since it was an exhaustive walk of all commits, all objectIds should be included
Expand Down
34 changes: 0 additions & 34 deletions tests/git/utils.ts
Original file line number Diff line number Diff line change
Expand Up @@ -68,39 +68,6 @@ async function createGitRepo({
}
}

const objectsDirName = 'objects';
const excludedDirs = ['pack', 'info'];

/**
* Walks the filesystem to list out all git objects in the objects directory
* @param fs
* @param gitDir
*/
async function listGitObjects({
efs,
gitDir,
}: {
efs: EncryptedFS;
gitDir: string;
}) {
const objectsDirPath = path.join(gitDir, objectsDirName);
const objectSet: Set<string> = new Set();
const objectDirs = await efs.promises.readdir(objectsDirPath);
for (const objectDir of objectDirs) {
if (typeof objectDir !== 'string') {
utils.never('objectDir should be a string');
}
if (excludedDirs.includes(objectDir)) continue;
const objectIds = await efs.promises.readdir(
path.join(objectsDirPath, objectDir),
);
for (const objectId of objectIds) {
objectSet.add(objectDir + objectId);
}
}
return [...objectSet];
}

type NegotiationTestData =
| {
type: 'want';
Expand Down Expand Up @@ -256,7 +223,6 @@ const gitRequestDataArb = fc.oneof(

export {
createGitRepo,
listGitObjects,
generateGitNegotiationLine,
request,
gitObjectIdArb,
Expand Down

0 comments on commit a6a274a

Please sign in to comment.