Skip to content

Commit cb2bd38

Browse files
[CAS] Cleanup chaining for UnifiedOnDiskCache
Previously, the chaining of KeyValueDB and OnDiskGraphDB is not consistant. Some operations are implemented directly in the lowest layer, some are in the UnifiedOnDiskCache layer, and some are in the ActionCache/ObjectStore layer. Now unifies all the chaining logics down into OnDiskGraphDB and OnDiskKeyValueDB layer, with the exception of KeyValueDB chaining will need the help of functions in UnifiedOnDiskCache layer. This cleans up the interfaces for UnifiedOnDiskCache member functions so it only contains database managment functions. Old functions like `KVPut/Get` can be done directly via underlying database file with a little bit of extra wrapper around it (see libCASPluginTest.dylib) implementation for the simple wrapper needed.
1 parent efa5592 commit cb2bd38

15 files changed

+315
-272
lines changed

llvm/include/llvm/CAS/BuiltinUnifiedCASDatabases.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
//===- BuiltinUnifiedCASDatabases.h -----------------------------*- C++ -*-===//
1+
//===----------------------------------------------------------------------===//
22
//
33
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
44
// See https://llvm.org/LICENSE.txt for license information.

llvm/include/llvm/CAS/ObjectStore.h

Lines changed: 10 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,11 @@
55
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
66
//
77
//===----------------------------------------------------------------------===//
8+
///
9+
/// \file
10+
/// This file contains the declaration of the ObjectStore class.
11+
///
12+
//===----------------------------------------------------------------------===//
813

914
#ifndef LLVM_CAS_OBJECTSTORE_H
1015
#define LLVM_CAS_OBJECTSTORE_H
@@ -246,7 +251,7 @@ class ObjectStore {
246251
/// Set the size for limiting growth of on-disk storage. This has an effect
247252
/// for when the instance is closed.
248253
///
249-
/// Implementations may be not have this implemented.
254+
/// Implementations may leave this unimplemented.
250255
virtual Error setSizeLimit(std::optional<uint64_t> SizeLimit) {
251256
return Error::success();
252257
}
@@ -262,7 +267,7 @@ class ObjectStore {
262267
/// Prune local storage to reduce its size according to the desired size
263268
/// limit. Pruning can happen concurrently with other operations.
264269
///
265-
/// Implementations may be not have this implemented.
270+
/// Implementations may leave this unimplemented.
266271
virtual Error pruneStorageData() { return Error::success(); }
267272

268273
/// Validate the whole node tree.
@@ -291,13 +296,9 @@ class ObjectStore {
291296
/// Reference to an abstract hierarchical node, with data and references.
292297
/// Reference is passed by value and is expected to be valid as long as the \a
293298
/// ObjectStore is.
294-
///
295-
/// TODO: Expose \a ObjectStore::readData() and only call \a
296-
/// ObjectStore::getDataString() when asked.
297299
class ObjectProxy {
298300
public:
299-
const ObjectStore &getCAS() const { return *CAS; }
300-
ObjectStore &getCAS() { return *CAS; }
301+
ObjectStore &getCAS() const { return *CAS; }
301302
CASID getID() const { return CAS->getID(Ref); }
302303
ObjectRef getRef() const { return Ref; }
303304
size_t getNumReferences() const { return CAS->getNumRefs(H); }
@@ -352,12 +353,13 @@ class ObjectProxy {
352353
ObjectHandle H;
353354
};
354355

356+
/// Create an in memory CAS.
355357
std::unique_ptr<ObjectStore> createInMemoryCAS();
356358

357359
/// \returns true if \c LLVM_ENABLE_ONDISK_CAS configuration was enabled.
358360
bool isOnDiskCASEnabled();
359361

360-
/// Gets or creates a persistent on-disk path at \p Path.
362+
/// Create a persistent on-disk path at \p Path.
361363
Expected<std::unique_ptr<ObjectStore>> createOnDiskCAS(const Twine &Path);
362364

363365
/// Set \p Path to a reasonable default on-disk path for a persistent CAS for

llvm/include/llvm/CAS/OnDiskGraphDB.h

Lines changed: 8 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -341,13 +341,16 @@ class OnDiskGraphDB {
341341
/// \param HashByteSize Size for the object digest hash bytes.
342342
/// \param UpstreamDB Optional on-disk store to be used for faulting-in nodes
343343
/// if they don't exist in the primary store. The upstream store is only used
344-
/// for reading nodes, new nodes are only written to the primary store.
344+
/// for reading nodes, new nodes are only written to the primary store. User
345+
/// need to make sure \p UpstreamDB outlives current instance of
346+
/// OnDiskGraphDB and the common usage is to have an \p UnifiedOnDiskCache to
347+
/// manage both.
345348
/// \param Policy If \p UpstreamDB is provided, controls how nodes are copied
346349
/// to primary store. This is recorded at creation time and subsequent opens
347350
/// need to pass the same policy otherwise the \p open will fail.
348351
static Expected<std::unique_ptr<OnDiskGraphDB>>
349352
open(StringRef Path, StringRef HashName, unsigned HashByteSize,
350-
std::unique_ptr<OnDiskGraphDB> UpstreamDB = nullptr,
353+
OnDiskGraphDB *UpstreamDB = nullptr,
351354
std::shared_ptr<OnDiskCASLogger> Logger = nullptr,
352355
FaultInPolicy Policy = FaultInPolicy::FullTree);
353356

@@ -440,9 +443,8 @@ class OnDiskGraphDB {
440443

441444
// Private constructor.
442445
OnDiskGraphDB(StringRef RootPath, OnDiskTrieRawHashMap Index,
443-
OnDiskDataAllocator DataPool,
444-
std::unique_ptr<OnDiskGraphDB> UpstreamDB, FaultInPolicy Policy,
445-
std::shared_ptr<OnDiskCASLogger> Logger);
446+
OnDiskDataAllocator DataPool, OnDiskGraphDB *UpstreamDB,
447+
FaultInPolicy Policy, std::shared_ptr<OnDiskCASLogger> Logger);
446448

447449
/// Mapping from hash to object reference.
448450
///
@@ -461,7 +463,7 @@ class OnDiskGraphDB {
461463
std::string RootPath;
462464

463465
/// Optional on-disk store to be used for faulting-in nodes.
464-
std::unique_ptr<OnDiskGraphDB> UpstreamDB;
466+
OnDiskGraphDB* UpstreamDB = nullptr;
465467

466468
/// The policy used to fault in data from upstream.
467469
FaultInPolicy FIPolicy;

llvm/include/llvm/CAS/OnDiskKeyValueDB.h

Lines changed: 11 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,8 @@
1919

2020
namespace llvm::cas::ondisk {
2121

22+
class UnifiedOnDiskCache;
23+
2224
/// An on-disk key-value data store with the following properties:
2325
/// * Keys are fixed length binary hashes with expected normal distribution.
2426
/// * Values are buffers of the same size, specified at creation time.
@@ -59,9 +61,13 @@ class OnDiskKeyValueDB {
5961
/// \param KeySize Size for the key hash bytes.
6062
/// \param ValueName Identifier name for the values.
6163
/// \param ValueSize Size for the value bytes.
64+
/// \param UnifiedCache An optional UnifiedOnDiskCache that manages the size
65+
/// and lifetime of the CAS instance and it must owns current initializing
66+
/// KeyValueDB after initialized.
6267
static Expected<std::unique_ptr<OnDiskKeyValueDB>>
6368
open(StringRef Path, StringRef HashName, unsigned KeySize,
6469
StringRef ValueName, size_t ValueSize,
70+
UnifiedOnDiskCache *UnifiedCache = nullptr,
6571
std::shared_ptr<OnDiskCASLogger> Logger = nullptr);
6672

6773
using CheckValueT =
@@ -71,11 +77,14 @@ class OnDiskKeyValueDB {
7177
Error validate(CheckValueT CheckValue) const;
7278

7379
private:
74-
OnDiskKeyValueDB(size_t ValueSize, OnDiskTrieRawHashMap Cache)
75-
: ValueSize(ValueSize), Cache(std::move(Cache)) {}
80+
OnDiskKeyValueDB(size_t ValueSize, OnDiskTrieRawHashMap Cache,
81+
UnifiedOnDiskCache *UnifiedCache)
82+
: ValueSize(ValueSize), Cache(std::move(Cache)),
83+
UnifiedCache(UnifiedCache) {}
7684

7785
const size_t ValueSize;
7886
OnDiskTrieRawHashMap Cache;
87+
UnifiedOnDiskCache *UnifiedCache = nullptr;
7988
};
8089

8190
} // namespace llvm::cas::ondisk

llvm/include/llvm/CAS/UnifiedOnDiskCache.h

Lines changed: 14 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
//===- UnifiedOnDiskCache.h -------------------------------------*- C++ -*-===//
1+
//===----------------------------------------------------------------------===//
22
//
33
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
44
// See https://llvm.org/LICENSE.txt for license information.
@@ -33,7 +33,7 @@ class OnDiskKeyValueDB;
3333
/// Usage patterns should be that an instance of \p UnifiedOnDiskCache is open
3434
/// for a limited period of time, e.g. for the duration of a build operation.
3535
/// For long-living processes that need periodic access to a
36-
/// \p UnifiedOnDiskCache, the client should device a scheme where access is
36+
/// \p UnifiedOnDiskCache, the client should devise a scheme where access is
3737
/// performed within some defined period. For example, if a service is designed
3838
/// to continuously wait for requests that access a \p UnifiedOnDiskCache, it
3939
/// could keep the instance alive while new requests are coming in but close it
@@ -43,28 +43,8 @@ class UnifiedOnDiskCache {
4343
/// The \p OnDiskGraphDB instance for the open directory.
4444
OnDiskGraphDB &getGraphDB() { return *PrimaryGraphDB; }
4545

46-
/// Associate an \p ObjectID, of the \p OnDiskGraphDB instance, with a key.
47-
///
48-
/// \param Key the hash bytes for the key.
49-
/// \param Value the \p ObjectID value.
50-
///
51-
/// \returns the \p ObjectID associated with the \p Key. It may be different
52-
/// than \p Value if another value was already associated with this key.
53-
Expected<ObjectID> KVPut(ArrayRef<uint8_t> Key, ObjectID Value);
54-
55-
/// Associate an \p ObjectID, of the \p OnDiskGraphDB instance, with a key.
56-
/// An \p ObjectID as a key is equivalent to its digest bytes.
57-
///
58-
/// \param Key the \p ObjectID for the key.
59-
/// \param Value the \p ObjectID value.
60-
///
61-
/// \returns the \p ObjectID associated with the \p Key. It may be different
62-
/// than \p Value if another value was already associated with this key.
63-
Expected<ObjectID> KVPut(ObjectID Key, ObjectID Value);
64-
65-
/// \returns the \p ObjectID, of the \p OnDiskGraphDB instance, associated
66-
/// with the \p Key, or \p std::nullopt if the key does not exist.
67-
Expected<std::optional<ObjectID>> KVGet(ArrayRef<uint8_t> Key);
46+
/// The \p OnDiskGraphDB instance for the open directory.
47+
OnDiskKeyValueDB &getKeyValueDB() { return *PrimaryKVDB; }
6848

6949
/// Open a \p UnifiedOnDiskCache instance for a directory.
7050
///
@@ -150,18 +130,23 @@ class UnifiedOnDiskCache {
150130
static Error collectGarbage(StringRef Path,
151131
ondisk::OnDiskCASLogger *Logger = nullptr);
152132

133+
/// Remove unused data from the current UnifiedOnDiskCache.
153134
Error collectGarbage();
154135

155-
~UnifiedOnDiskCache();
136+
/// Helper function to convert the value stored in KeyValueDB and ObjectID.
137+
static ObjectID getObjectIDFromValue(ArrayRef<char> Value);
156138

157-
Error validateActionCache();
139+
using ValueBytes = std::array<char, sizeof(uint64_t)>;
140+
static ValueBytes getValueFromObjectID(ObjectID ID);
158141

159-
OnDiskGraphDB *getUpstreamGraphDB() const { return UpstreamGraphDB; }
142+
~UnifiedOnDiskCache();
160143

161144
private:
145+
friend class OnDiskGraphDB;
146+
friend class OnDiskKeyValueDB;
162147
UnifiedOnDiskCache();
163148

164-
Expected<std::optional<ObjectID>>
149+
Expected<std::optional<ArrayRef<char>>>
165150
faultInFromUpstreamKV(ArrayRef<uint8_t> Key);
166151

167152
/// \returns the storage size of the primary directory.
@@ -175,7 +160,7 @@ class UnifiedOnDiskCache {
175160
std::atomic<bool> NeedsGarbageCollection;
176161
std::string PrimaryDBDir;
177162

178-
OnDiskGraphDB *UpstreamGraphDB = nullptr;
163+
std::unique_ptr<OnDiskGraphDB> UpstreamGraphDB;
179164
std::unique_ptr<OnDiskGraphDB> PrimaryGraphDB;
180165

181166
std::unique_ptr<OnDiskKeyValueDB> UpstreamKVDB;

llvm/lib/CAS/ActionCaches.cpp

Lines changed: 36 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -13,15 +13,13 @@
1313
#include "BuiltinCAS.h"
1414
#include "llvm/ADT/TrieRawHashMap.h"
1515
#include "llvm/CAS/ActionCache.h"
16-
#include "llvm/CAS/ObjectStore.h"
1716
#include "llvm/CAS/OnDiskCASLogger.h"
18-
#include "llvm/CAS/OnDiskGraphDB.h"
1917
#include "llvm/CAS/OnDiskKeyValueDB.h"
2018
#include "llvm/CAS/UnifiedOnDiskCache.h"
2119
#include "llvm/Config/llvm-config.h"
22-
#include "llvm/Support/Alignment.h"
2320
#include "llvm/Support/BLAKE3.h"
2421
#include "llvm/Support/Compiler.h"
22+
#include "llvm/Support/Errc.h"
2523
#include "llvm/Support/Path.h"
2624

2725
#define DEBUG_TYPE "cas-action-caches"
@@ -67,6 +65,7 @@ class InMemoryActionCache final : public ActionCache {
6765
InMemoryCacheT Cache;
6866
};
6967

68+
/// Builtin basic OnDiskActionCache that uses one underlying OnDiskKeyValueDB.
7069
class OnDiskActionCache final : public ActionCache {
7170
public:
7271
Error putImpl(ArrayRef<uint8_t> ActionKey, const CASID &Result,
@@ -87,6 +86,8 @@ class OnDiskActionCache final : public ActionCache {
8786
using DataT = CacheEntry<sizeof(HashType)>;
8887
};
8988

89+
/// Builtin unified ActionCache that wraps around UnifiedOnDiskCache to provide
90+
/// access to its ActionCache.
9091
class UnifiedOnDiskActionCache final : public ActionCache {
9192
public:
9293
Error putImpl(ArrayRef<uint8_t> ActionKey, const CASID &Result,
@@ -118,7 +119,8 @@ static Error createResultCachePoisonedError(ArrayRef<uint8_t> KeyHash,
118119
}
119120

120121
Expected<std::optional<CASID>>
121-
InMemoryActionCache::getImpl(ArrayRef<uint8_t> Key, bool /*CanBeDistributed*/) const {
122+
InMemoryActionCache::getImpl(ArrayRef<uint8_t> Key,
123+
bool /*CanBeDistributed*/) const {
122124
auto Result = Cache.find(Key);
123125
if (!Result)
124126
return std::nullopt;
@@ -169,17 +171,18 @@ OnDiskActionCache::create(StringRef AbsPath) {
169171
ondisk::OnDiskCASLogger::openIfEnabled(AbsPath).moveInto(Logger))
170172
return std::move(E);
171173
std::unique_ptr<ondisk::OnDiskKeyValueDB> DB;
172-
if (Error E = ondisk::OnDiskKeyValueDB::open(AbsPath, getHashName(),
173-
sizeof(HashType), getHashName(),
174-
sizeof(DataT), std::move(Logger))
174+
if (Error E = ondisk::OnDiskKeyValueDB::open(
175+
AbsPath, getHashName(), sizeof(HashType), getHashName(),
176+
sizeof(DataT), /*UnifiedCache=*/nullptr, std::move(Logger))
175177
.moveInto(DB))
176178
return std::move(E);
177179
return std::unique_ptr<OnDiskActionCache>(
178180
new OnDiskActionCache(std::move(DB)));
179181
}
180182

181183
Expected<std::optional<CASID>>
182-
OnDiskActionCache::getImpl(ArrayRef<uint8_t> Key, bool /*CanBeDistributed*/) const {
184+
OnDiskActionCache::getImpl(ArrayRef<uint8_t> Key,
185+
bool /*CanBeDistributed*/) const {
183186
std::optional<ArrayRef<char>> Val;
184187
if (Error E = DB->get(Key).moveInto(Val))
185188
return std::move(E);
@@ -218,13 +221,14 @@ UnifiedOnDiskActionCache::UnifiedOnDiskActionCache(
218221
Expected<std::optional<CASID>>
219222
UnifiedOnDiskActionCache::getImpl(ArrayRef<uint8_t> Key,
220223
bool /*CanBeDistributed*/) const {
221-
std::optional<ondisk::ObjectID> Val;
222-
if (Error E = UniDB->KVGet(Key).moveInto(Val))
224+
std::optional<ArrayRef<char>> Val;
225+
if (Error E = UniDB->getKeyValueDB().get(Key).moveInto(Val))
223226
return std::move(E);
224227
if (!Val)
225228
return std::nullopt;
229+
auto ID = ondisk::UnifiedOnDiskCache::getObjectIDFromValue(*Val);
226230
return CASID::create(&getContext(),
227-
toStringRef(UniDB->getGraphDB().getDigest(*Val)));
231+
toStringRef(UniDB->getGraphDB().getDigest(ID)));
228232
}
229233

230234
Error UnifiedOnDiskActionCache::putImpl(ArrayRef<uint8_t> Key,
@@ -233,20 +237,35 @@ Error UnifiedOnDiskActionCache::putImpl(ArrayRef<uint8_t> Key,
233237
auto Expected = UniDB->getGraphDB().getReference(Result.getHash());
234238
if (LLVM_UNLIKELY(!Expected))
235239
return Expected.takeError();
236-
std::optional<ondisk::ObjectID> Observed;
237-
if (Error E = UniDB->KVPut(Key, *Expected).moveInto(Observed))
240+
241+
auto Value = ondisk::UnifiedOnDiskCache::getValueFromObjectID(*Expected);
242+
std::optional<ArrayRef<char>> Observed;
243+
if (Error E = UniDB->getKeyValueDB().put(Key, Value).moveInto(Observed))
238244
return E;
239245

240-
if (*Expected == Observed)
246+
auto ObservedID = ondisk::UnifiedOnDiskCache::getObjectIDFromValue(*Observed);
247+
if (*Expected == ObservedID)
241248
return Error::success();
242249

243250
return createResultCachePoisonedError(
244-
Key, getContext(), Result,
245-
UniDB->getGraphDB().getDigest(*Observed));
251+
Key, getContext(), Result, UniDB->getGraphDB().getDigest(ObservedID));
246252
}
247253

248254
Error UnifiedOnDiskActionCache::validate() const {
249-
return UniDB->validateActionCache();
255+
auto ValidateRef = [](FileOffset Offset, ArrayRef<char> Value) -> Error {
256+
auto ID = ondisk::UnifiedOnDiskCache::getObjectIDFromValue(Value);
257+
auto formatError = [&](Twine Msg) {
258+
return createStringError(
259+
llvm::errc::illegal_byte_sequence,
260+
"bad record at 0x" +
261+
utohexstr((unsigned)Offset.get(), /*LowerCase=*/true) + ": " +
262+
Msg.str());
263+
};
264+
if (ID.getOpaqueData() == 0)
265+
return formatError("zero is not a valid ref");
266+
return Error::success();
267+
};
268+
return UniDB->getKeyValueDB().validate(ValidateRef);
250269
}
251270

252271
Expected<std::unique_ptr<ActionCache>>

llvm/lib/CAS/BuiltinUnifiedCASDatabases.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
//===- BuiltinUnifiedCASDatabases.cpp ---------------------------*- C++ -*-===//
1+
//===----------------------------------------------------------------------===//
22
//
33
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
44
// See https://llvm.org/LICENSE.txt for license information.
@@ -35,4 +35,4 @@ Expected<ValidationResult> cas::validateOnDiskUnifiedCASDatabasesIfNeeded(
3535
#else
3636
return createStringError(inconvertibleErrorCode(), "OnDiskCache is disabled");
3737
#endif
38-
}
38+
}

0 commit comments

Comments
 (0)