From 847d6c1ebf403e4f17e425ade83e94e7c455691a Mon Sep 17 00:00:00 2001 From: Sewer56 Date: Thu, 19 Sep 2024 14:58:39 +0100 Subject: [PATCH 1/6] Revert "Fix the file deletion error by removing the deadlock (#2058)" This reverts commit f8e32015956fd2ab6ff3e192e72d94a6b9f60749. --- src/NexusMods.DataModel/NxFileStore.cs | 1 + 1 file changed, 1 insertion(+) diff --git a/src/NexusMods.DataModel/NxFileStore.cs b/src/NexusMods.DataModel/NxFileStore.cs index f43bb6319..560d2da32 100644 --- a/src/NexusMods.DataModel/NxFileStore.cs +++ b/src/NexusMods.DataModel/NxFileStore.cs @@ -60,6 +60,7 @@ public NxFileStore( /// public ValueTask HaveFile(Hash hash) { + using var lck = _lock.ReadLock(); var db = _conn.Db; var archivedFiles = ArchivedFile.FindByHash(db, hash).Any(x => x.IsValid()); return ValueTask.FromResult(archivedFiles); From e831653bba94bc3c0af27f8ec5c30784bb4d3e3c Mon Sep 17 00:00:00 2001 From: Sewer56 Date: Thu, 19 Sep 2024 15:08:09 +0100 Subject: [PATCH 2/6] WIP Commit: Don't allow yielding to potential non-GC code from within GC. --- .../RunGarbageCollector.cs | 35 ++++++++++++++----- 1 file changed, 26 insertions(+), 9 deletions(-) diff --git a/src/NexusMods.App.GarbageCollection.DataModel/RunGarbageCollector.cs b/src/NexusMods.App.GarbageCollection.DataModel/RunGarbageCollector.cs index ae38a5452..535cd4aa3 100644 --- a/src/NexusMods.App.GarbageCollection.DataModel/RunGarbageCollector.cs +++ b/src/NexusMods.App.GarbageCollection.DataModel/RunGarbageCollector.cs @@ -1,7 +1,9 @@ using NexusMods.Abstractions.IO; using NexusMods.Abstractions.Settings; using NexusMods.App.GarbageCollection.Nx; +using NexusMods.Hashing.xxHash64; using NexusMods.MnemonicDB.Abstractions; +using NexusMods.Paths; namespace NexusMods.App.GarbageCollection.DataModel; /// @@ -10,25 +12,40 @@ namespace NexusMods.App.GarbageCollection.DataModel; /// public static class RunGarbageCollector { + private static AsyncFriendlyReaderWriterLock _gcLock = new(); + /// /// The archive locations, usually obtained from 'DataModelSettings'. /// The that requires locking for concurrent access. /// The MneumonicDB to the DataModel. public static void Do(Span archiveLocations, IFileStore store, IConnection connection) { - using var lck = store.Lock(); + using var gcLock = _gcLock.WriteLock(); + var toUpdateInDataStore = new List(); + + using (store.Lock()) + { + var gc = new ArchiveGarbageCollector(); + DataStoreNxArchiveFinder.FindAllArchives(archiveLocations, gc); + DataStoreReferenceMarker.MarkUsedFiles(connection, gc); + gc.CollectGarbage(new Progress(), (progress, toArchive, toRemove, archive) => + { + NxRepacker.RepackArchive(progress, toArchive, toRemove, archive, false, out var newArchivePath); + toUpdateInDataStore.Add(new ToUpdateInDataStoreEntry(toRemove, archive.FilePath, newArchivePath)); + }); + } + var updater = new NxFileStoreUpdater(connection); - var gc = new ArchiveGarbageCollector(); - DataStoreNxArchiveFinder.FindAllArchives(archiveLocations, gc); - DataStoreReferenceMarker.MarkUsedFiles(connection, gc); - gc.CollectGarbage(new Progress(), (progress, toArchive, toRemove, archive) => + foreach (var entry in toUpdateInDataStore) { - NxRepacker.RepackArchive(progress, toArchive, toRemove, archive, false, out var newArchivePath); - updater.UpdateNxFileStore(toRemove, archive.FilePath, newArchivePath); + updater.UpdateNxFileStore(entry.ToRemove, entry.OldFilePath, entry.NewFilePath); // Delete original archive. We do this in a delayed fashion such that // a power loss during the UpdateNxFileStore operation does not lead // to an inconsistent state - archive.FilePath.Delete(); - }); + entry.OldFilePath.Delete(); + } + } + + private record struct ToUpdateInDataStoreEntry(List ToRemove, AbsolutePath OldFilePath, AbsolutePath NewFilePath); } From 880a1a72f16afd8810218c81091fbbc230f455f6 Mon Sep 17 00:00:00 2001 From: Sewer56 Date: Thu, 19 Sep 2024 15:13:00 +0100 Subject: [PATCH 3/6] Added: Explanation for previous commit --- .../RunGarbageCollector.cs | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/src/NexusMods.App.GarbageCollection.DataModel/RunGarbageCollector.cs b/src/NexusMods.App.GarbageCollection.DataModel/RunGarbageCollector.cs index 535cd4aa3..774d1e7ff 100644 --- a/src/NexusMods.App.GarbageCollection.DataModel/RunGarbageCollector.cs +++ b/src/NexusMods.App.GarbageCollection.DataModel/RunGarbageCollector.cs @@ -20,6 +20,7 @@ public static class RunGarbageCollector /// The MneumonicDB to the DataModel. public static void Do(Span archiveLocations, IFileStore store, IConnection connection) { + // See 'SAFETY' comment below for explanation of 'gcLock' using var gcLock = _gcLock.WriteLock(); var toUpdateInDataStore = new List(); @@ -35,6 +36,12 @@ public static void Do(Span archiveLocations, IFileStore store, }); } + // SAFETY: Updating the FileStore interacts with external non-GC components, + // such as MnemonicDB. This may cause us to yield to external code + // that could touch the FileStore lock. To avoid deadlocks, we should + // prevent this from happening if possible. + // + // This is why we release `store.Lock()` early. var updater = new NxFileStoreUpdater(connection); foreach (var entry in toUpdateInDataStore) { @@ -44,7 +51,6 @@ public static void Do(Span archiveLocations, IFileStore store, // to an inconsistent state entry.OldFilePath.Delete(); } - } private record struct ToUpdateInDataStoreEntry(List ToRemove, AbsolutePath OldFilePath, AbsolutePath NewFilePath); From f531dbd4f1a7c2849e0446ace1bced862f73bb0c Mon Sep 17 00:00:00 2001 From: Sewer56 Date: Thu, 19 Sep 2024 15:17:34 +0100 Subject: [PATCH 4/6] Changed: _gcLock should be read-only. --- .../RunGarbageCollector.cs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/NexusMods.App.GarbageCollection.DataModel/RunGarbageCollector.cs b/src/NexusMods.App.GarbageCollection.DataModel/RunGarbageCollector.cs index 774d1e7ff..baae07196 100644 --- a/src/NexusMods.App.GarbageCollection.DataModel/RunGarbageCollector.cs +++ b/src/NexusMods.App.GarbageCollection.DataModel/RunGarbageCollector.cs @@ -12,7 +12,7 @@ namespace NexusMods.App.GarbageCollection.DataModel; /// public static class RunGarbageCollector { - private static AsyncFriendlyReaderWriterLock _gcLock = new(); + private static readonly AsyncFriendlyReaderWriterLock _gcLock = new(); /// /// The archive locations, usually obtained from 'DataModelSettings'. From 0e2140230bae212c1ff3e4621f39ba483d5d3c4f Mon Sep 17 00:00:00 2001 From: Sewer56 Date: Thu, 19 Sep 2024 15:37:05 +0100 Subject: [PATCH 5/6] Added: Extra note to the PR --- .../RunGarbageCollector.cs | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/NexusMods.App.GarbageCollection.DataModel/RunGarbageCollector.cs b/src/NexusMods.App.GarbageCollection.DataModel/RunGarbageCollector.cs index baae07196..34fa0092b 100644 --- a/src/NexusMods.App.GarbageCollection.DataModel/RunGarbageCollector.cs +++ b/src/NexusMods.App.GarbageCollection.DataModel/RunGarbageCollector.cs @@ -42,6 +42,10 @@ public static void Do(Span archiveLocations, IFileStore store, // prevent this from happening if possible. // // This is why we release `store.Lock()` early. + + // NOTE: In theory UpdateNxFileStore can call GC back again. This is unlikely to happen + // however for the time being; because we only run GC when deleting a library item + // or loadout. No callback should do that. Long term we want to prevent re-entrancy. var updater = new NxFileStoreUpdater(connection); foreach (var entry in toUpdateInDataStore) { From 0c088a43963782724cabe8c13d131c74d3085abf Mon Sep 17 00:00:00 2001 From: Sewer56 Date: Thu, 19 Sep 2024 15:42:31 +0100 Subject: [PATCH 6/6] Added: Additional Note w.r.t. needing a non-blocking Commit --- .../RunGarbageCollector.cs | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/src/NexusMods.App.GarbageCollection.DataModel/RunGarbageCollector.cs b/src/NexusMods.App.GarbageCollection.DataModel/RunGarbageCollector.cs index 34fa0092b..188be0edb 100644 --- a/src/NexusMods.App.GarbageCollection.DataModel/RunGarbageCollector.cs +++ b/src/NexusMods.App.GarbageCollection.DataModel/RunGarbageCollector.cs @@ -46,6 +46,11 @@ public static void Do(Span archiveLocations, IFileStore store, // NOTE: In theory UpdateNxFileStore can call GC back again. This is unlikely to happen // however for the time being; because we only run GC when deleting a library item // or loadout. No callback should do that. Long term we want to prevent re-entrancy. + // + // Running arbitrary code in GC in any system is however prone to possible failure, + // so long term we will want to avoid UpdateNxFileStore (MnemonicDB Commit) to avoid + // yielding to external code. We need a non-blocking `Commit`; that + // sends stuff off to another thread or internal queue without blocking. var updater = new NxFileStoreUpdater(connection); foreach (var entry in toUpdateInDataStore) {