diff --git a/src/git/utils.ts b/src/git/utils.ts index 8323f4a1fc..24fa04c6b6 100644 --- a/src/git/utils.ts +++ b/src/git/utils.ts @@ -8,6 +8,7 @@ import type { RequestType, } from './types'; import type { EncryptedFS } from 'encryptedfs'; +import path from 'path'; import git from 'isomorphic-git'; import { requestTypes } from './types'; import * as utils from '../utils'; @@ -230,6 +231,35 @@ async function listObjects({ return [...commits, ...trees, ...blobs, ...tags]; } +const objectsDirName = 'objects'; +const excludedDirs = ['pack', 'info']; + +/** + * Walks the filesystem to list out all git objects in the objects directory + */ +async function listObjectsAll({ + fs, + gitDir, +}: { + fs: EncryptedFS; + gitDir: string; +}) { + const objectsDirPath = path.join(gitDir, objectsDirName); + const objectSet: Set = new Set(); + const objectDirs = await fs.promises.readdir(objectsDirPath); + for (const objectDir of objectDirs) { + if (typeof objectDir !== 'string') never(); + if (excludedDirs.includes(objectDir)) continue; + const objectIds = await fs.promises.readdir( + path.join(objectsDirPath, objectDir), + ); + for (const objectId of objectIds) { + objectSet.add(objectDir + objectId); + } + } + return [...objectSet]; +} + /** * Parses a want/has line from ref negotiation phase. */ @@ -309,6 +339,7 @@ export { listReferencesGenerator, referenceCapability, listObjects, + listObjectsAll, parseRequestLine, isObjectId, assertObjectId, diff --git a/src/vaults/VaultInternal.ts b/src/vaults/VaultInternal.ts index 2575144cc9..eea2fa0401 100644 --- a/src/vaults/VaultInternal.ts +++ b/src/vaults/VaultInternal.ts @@ -33,6 +33,7 @@ import * as vaultsEvents from './events'; import { tagLast } from './types'; import * as ids from '../ids'; import * as nodesUtils from '../nodes/utils'; +import * as gitUtils from '../git/utils'; import * as utils from '../utils'; type RemoteInfo = { @@ -754,7 +755,7 @@ class VaultInternal { // This ensures that any uncommitted state is dropped await this.cleanWorkingDirectory(); // Do global GC operation - await this.garbageCollectGitObjects(); + await this.garbageCollectGitObjectsGlobal(); // Setting dirty back to false await tran.put( @@ -998,27 +999,7 @@ class VaultInternal { }); // We clean old history if a commit was made on previous version if (headRef !== masterRef) { - // Delete old commits following chain from masterRef -> headRef - let currentRef = masterRef; - while (currentRef !== headRef) { - // Read commit info - const commit = await git.readCommit({ - fs: this.efs, - dir: this.vaultDataDir, - gitdir: this.vaultGitDir, - oid: currentRef, - }); - // Delete commit - await vaultsUtils.deleteObject( - this.efs, - this.vaultGitDir, - commit.oid, - ); - // Getting new ref - const nextRef = commit.commit.parent.pop(); - if (nextRef == null) break; - currentRef = nextRef; - } + await this.garbageCollectGitObjectsLocal(masterRef, headRef); } } } @@ -1064,69 +1045,66 @@ class VaultInternal { } /** - * Deletes any git objects that can't be reached from the canonicalBranch + * This will walk the current canonicalBranch history and delete any objects that are not a part of it. + * This is a dumb method since it will compare all objects to a walked path. There are better ways to do this. */ - protected async garbageCollectGitObjects() { - // To garbage collect the git objects, - // we need to walk all objects connected to the master branch - // and delete the object files that are not touched by this walk - const touchedOids = {}; + + protected async garbageCollectGitObjectsGlobal() { + const objectIdsAll = await gitUtils.listObjectsAll({ + fs: this.efs, + gitDir: this.vaultGitDir, + }); + const objects = new Set(objectIdsAll); const masterRef = await git.resolveRef({ fs: this.efs, dir: this.vaultDataDir, gitdir: this.vaultGitDir, ref: vaultsUtils.canonicalBranch, }); - const queuedOids: string[] = [masterRef]; - while (queuedOids.length > 0) { - const currentOid = queuedOids.shift()!; - if (touchedOids[currentOid] === null) continue; - const result = await git.readObject({ - fs: this.efs, - dir: this.vaultDataDir, - gitdir: this.vaultGitDir, - oid: currentOid, - }); - touchedOids[result.oid] = result.type; - if (result.format !== 'parsed') continue; - switch (result.type) { - case 'commit': - { - const object = result.object; - queuedOids.push(...object.parent); - queuedOids.push(object.tree); - } - break; - case 'tree': - { - const object = result.object; - for (const item of object) { - touchedOids[item.oid] = item.type; - } - } - break; - default: { - utils.never(); - } - } + const reachableObjects = await gitUtils.listObjects({ + fs: this.efs, + dir: this.vaultDataDir, + gitDir: this.vaultGitDir, + wants: [masterRef], + haves: [], + }); + // Walk from head to all reachable objects + for (const objectReachable of reachableObjects) { + objects.delete(objectReachable); } - // Walking all objects - const objectPath = path.join(this.vaultGitDir, 'objects'); - const buckets = (await this.efs.readdir(objectPath)).filter((item) => { - return item !== 'info' && item !== 'pack'; + // Any objects left in `objects` was unreachable, thus they are a part of orphaned branches + // So we want to delete them. + const deletePs: Array> = []; + for (const objectId of objects) { + deletePs.push( + vaultsUtils.deleteObject(this.efs, this.vaultGitDir, objectId), + ); + } + await Promise.all(deletePs); + } + + /** + * This will walk from the `startId` to the `StopId` deleting objects as it goes. + * This is smarter since it only walks over the old history and not everything. + */ + protected async garbageCollectGitObjectsLocal( + startId: string, + stopId: string, + ) { + const objects = await gitUtils.listObjects({ + fs: this.efs, + dir: this.vaultDataDir, + gitDir: this.vaultGitDir, + wants: [startId], + haves: [stopId], }); - for (const bucket of buckets) { - const bucketPath = path.join(objectPath, bucket.toString()); - const oids = await this.efs.readdir(bucketPath); - for (const shortOid of oids) { - const oidPath = path.join(bucketPath, shortOid.toString()); - const oid = bucket.toString() + shortOid.toString(); - if (touchedOids[oid] === undefined) { - // Removing unused objects - await this.efs.unlink(oidPath); - } - } + const deletePs: Array> = []; + for (const objectId of objects) { + deletePs.push( + vaultsUtils.deleteObject(this.efs, this.vaultGitDir, objectId), + ); } + await Promise.all(deletePs); } } diff --git a/src/vaults/utils.ts b/src/vaults/utils.ts index 53397c8f89..c022763830 100644 --- a/src/vaults/utils.ts +++ b/src/vaults/utils.ts @@ -44,7 +44,7 @@ function commitAuthor(nodeId: NodeId): { name: string; email: string } { } async function* readDirRecursively( - fs: FileSystemReadable, + fs, dir = '.', ): AsyncGenerator { const dirents = await fs.promises.readdir(dir); diff --git a/tests/git/http.test.ts b/tests/git/http.test.ts index 898a4ab638..702292f7b4 100644 --- a/tests/git/http.test.ts +++ b/tests/git/http.test.ts @@ -5,6 +5,7 @@ import git from 'isomorphic-git'; import { test } from '@fast-check/jest'; import fc from 'fast-check'; import * as gitHttp from '@/git/http'; +import * as gitUtils from '@/git/utils'; import * as validationErrors from '@/validation/errors'; import * as gitTestUtils from './utils'; @@ -191,7 +192,7 @@ describe('Git Http', () => { }, ], }); - const objectIds = await gitTestUtils.listGitObjects(gitDirs); + const objectIds = await gitUtils.listObjectsAll(gitDirs); const gen = gitHttp.generatePackData({ ...gitDirs, objectIds, diff --git a/tests/git/utils.test.ts b/tests/git/utils.test.ts index d09bdd1e57..f981cb2382 100644 --- a/tests/git/utils.test.ts +++ b/tests/git/utils.test.ts @@ -183,7 +183,7 @@ describe('Git utils', () => { wants: commitIds, haves: [], }); - const expectedObjectIds = await gitTestUtils.listGitObjects(gitDirs); + const expectedObjectIds = await gitUtils.listObjectsAll(gitDirs); // Found objects should include all the commits expect(objectList).toIncludeAllMembers(commitIds); // Since it was an exhaustive walk of all commits, all objectIds should be included diff --git a/tests/git/utils.ts b/tests/git/utils.ts index dbcafaafbc..8652133c3e 100644 --- a/tests/git/utils.ts +++ b/tests/git/utils.ts @@ -68,39 +68,6 @@ async function createGitRepo({ } } -const objectsDirName = 'objects'; -const excludedDirs = ['pack', 'info']; - -/** - * Walks the filesystem to list out all git objects in the objects directory - * @param fs - * @param gitDir - */ -async function listGitObjects({ - efs, - gitDir, -}: { - efs: EncryptedFS; - gitDir: string; -}) { - const objectsDirPath = path.join(gitDir, objectsDirName); - const objectSet: Set = new Set(); - const objectDirs = await efs.promises.readdir(objectsDirPath); - for (const objectDir of objectDirs) { - if (typeof objectDir !== 'string') { - utils.never('objectDir should be a string'); - } - if (excludedDirs.includes(objectDir)) continue; - const objectIds = await efs.promises.readdir( - path.join(objectsDirPath, objectDir), - ); - for (const objectId of objectIds) { - objectSet.add(objectDir + objectId); - } - } - return [...objectSet]; -} - type NegotiationTestData = | { type: 'want'; @@ -256,7 +223,6 @@ const gitRequestDataArb = fc.oneof( export { createGitRepo, - listGitObjects, generateGitNegotiationLine, request, gitObjectIdArb,