Skip to content

Commit

Permalink
Revert D65740612: Use Velox fs for ssd cache evictlog file
Browse files Browse the repository at this point in the history
Differential Revision:
D65740612

Original commit changeset: 4c6ccac2f0e8

Original Phabricator Diff: D65740612

fbshipit-source-id: dd5f8564c1697a231e88713b7a1e32d8aac951d3
  • Loading branch information
Wenbin Lin authored and facebook-github-bot committed Nov 18, 2024
1 parent a8dd147 commit faf5951
Show file tree
Hide file tree
Showing 7 changed files with 80 additions and 244 deletions.
118 changes: 63 additions & 55 deletions velox/common/caching/SsdFile.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -43,9 +43,6 @@ DEFINE_bool(ssd_verify_write, false, "Read back data after writing to SSD");
namespace facebook::velox::cache {

namespace {

// TODO: Remove this function once we migrate all files to velox fs.
//
// Disable 'copy on write' on the given file. Will throw if failed for any
// reason, including file system not supporting cow feature.
void disableCow(int32_t fd) {
Expand All @@ -69,6 +66,28 @@ void disableCow(int32_t fd) {
#endif // linux
}

// TODO: Remove this function once we migrate all files to velox fs.
void disableFileCow(int32_t fd) {
#ifdef linux
int attr{0};
auto res = ioctl(fd, FS_IOC_GETFLAGS, &attr);
VELOX_CHECK_EQ(
0,
res,
"ioctl(FS_IOC_GETFLAGS) failed: {}, {}",
res,
folly::errnoStr(errno));
attr |= FS_NOCOW_FL;
res = ioctl(fd, FS_IOC_SETFLAGS, &attr);
VELOX_CHECK_EQ(
0,
res,
"ioctl(FS_IOC_SETFLAGS, FS_NOCOW_FL) failed: {}, {}",
res,
folly::errnoStr(errno));
#endif // linux
}

void addEntryToIovecs(AsyncDataCacheEntry& entry, std::vector<iovec>& iovecs) {
if (entry.tinyData() != nullptr) {
iovecs.push_back({entry.tinyData(), static_cast<size_t>(entry.size())});
Expand Down Expand Up @@ -335,7 +354,7 @@ bool SsdFile::growOrEvictLocked() {
}
}

auto candidates =
const auto candidates =
tracker_.findEvictionCandidates(3, numRegions_, regionPins_);
if (candidates.empty()) {
suspended_ = true;
Expand Down Expand Up @@ -657,49 +676,44 @@ bool SsdFile::removeFileEntries(
return true;
}

void SsdFile::logEviction(std::vector<int32_t>& regions) {
if (!checkpointEnabled()) {
return;
}
const auto length = regions.size() * sizeof(regions[0]);
const std::vector<iovec> iovecs = {{regions.data(), length}};
try {
evictLogWriteFile_->write(iovecs, 0, static_cast<int64_t>(length));
} catch (const std::exception& e) {
++stats_.writeSsdErrors;
VELOX_SSD_CACHE_LOG(ERROR) << "Failed to log eviction: " << e.what();
void SsdFile::logEviction(const std::vector<int32_t>& regions) {
if (checkpointEnabled()) {
const int32_t rc = ::write(
evictLogFd_, regions.data(), regions.size() * sizeof(regions[0]));
if (rc != regions.size() * sizeof(regions[0])) {
checkpointError(rc, "Failed to log eviction");
}
}
}

void SsdFile::deleteCheckpoint(bool keepLog) {
if (checkpointDeleted_) {
return;
}

if (evictLogWriteFile_ != nullptr) {
try {
if (keepLog) {
evictLogWriteFile_->truncate(0);
evictLogWriteFile_->flush();
} else {
evictLogWriteFile_->close();
fs_->remove(getEvictLogFilePath());
evictLogWriteFile_.reset();
}
} catch (const std::exception& e) {
++stats_.deleteCheckpointErrors;
VELOX_SSD_CACHE_LOG(ERROR) << "Error in deleting evictLog: " << e.what();
if (evictLogFd_ >= 0) {
if (keepLog) {
::lseek(evictLogFd_, 0, SEEK_SET);
::ftruncate(evictLogFd_, 0);
::fsync(evictLogFd_);
} else {
::close(evictLogFd_);
evictLogFd_ = -1;
}
}

checkpointDeleted_ = true;
const auto logPath = getEvictLogFilePath();
int32_t logRc = 0;
if (!keepLog) {
logRc = ::unlink(logPath.c_str());
}
const auto checkpointPath = getCheckpointFilePath();
const auto checkpointRc = ::unlink(checkpointPath.c_str());
if (checkpointRc != 0) {
VELOX_SSD_CACHE_LOG(ERROR)
<< "Error in deleting checkpoint: " << checkpointRc;
}
if (checkpointRc != 0) {
if ((logRc != 0) || (checkpointRc != 0)) {
++stats_.deleteCheckpointErrors;
VELOX_SSD_CACHE_LOG(ERROR)
<< "Error in deleting log and checkpoint. log: " << logRc
<< " checkpoint: " << checkpointRc;
}
}

Expand Down Expand Up @@ -837,9 +851,8 @@ void SsdFile::checkpoint(bool force) {
// NOTE: we shall truncate eviction log after checkpoint file sync
// completes so that we never recover from an old checkpoint file without
// log evictions. The latter might lead to data consistent issue.
VELOX_CHECK_NOT_NULL(evictLogWriteFile_);
evictLogWriteFile_->truncate(0);
evictLogWriteFile_->flush();
checkRc(::ftruncate(evictLogFd_, 0), "Truncate of event log");
checkRc(::fsync(evictLogFd_), "Sync of evict log");

VELOX_SSD_CACHE_LOG(INFO)
<< "Checkpoint persisted with " << entries_.size() << " cache entries";
Expand Down Expand Up @@ -870,14 +883,18 @@ void SsdFile::initializeCheckpoint() {
getCheckpointFilePath());
}
const auto logPath = getEvictLogFilePath();
filesystems::FileOptions evictLogFileOptions;
evictLogFileOptions.shouldThrowOnFileAlreadyExists = false;
try {
evictLogWriteFile_ = fs_->openFileForWrite(logPath, evictLogFileOptions);
} catch (std::exception& e) {
evictLogFd_ = ::open(logPath.c_str(), O_CREAT | O_RDWR, S_IRUSR | S_IWUSR);
if (disableFileCow_) {
disableCow(evictLogFd_);
}
if (evictLogFd_ < 0) {
++stats_.openLogErrors;
// Failure to open the log at startup is a process terminating error.
VELOX_FAIL("Could not open evict log {}: {}", logPath, e.what());
VELOX_FAIL(
"Could not open evict log {}, rc {}: {}",
logPath,
evictLogFd_,
folly::errnoStr(errno));
}

try {
Expand Down Expand Up @@ -948,9 +965,6 @@ void SsdFile::disableFileCow() {
const std::unordered_map<std::string, std::string> attributes = {
{std::string(LocalWriteFile::Attributes::kNoCow), "true"}};
writeFile_->setAttributes(attributes);
if (evictLogWriteFile_ != nullptr) {
evictLogWriteFile_->setAttributes(attributes);
}
#endif // linux
}

Expand Down Expand Up @@ -1007,16 +1021,10 @@ void SsdFile::readCheckpoint(std::ifstream& state) {
idMap[id] = StringIdLease(fileIds(), id, name);
}

const auto logPath = getEvictLogFilePath();
const auto evictLogReadFile = fs_->openFileForRead(logPath);
const auto logSize = evictLogReadFile->size();
const auto logSize = ::lseek(evictLogFd_, 0, SEEK_END);
std::vector<uint32_t> evicted(logSize / sizeof(uint32_t));
try {
evictLogReadFile->pread(0, logSize, evicted.data());
} catch (const std::exception& e) {
++stats_.readCheckpointErrors;
VELOX_FAIL("Failed to read eviction log: {}", e.what());
}
const auto rc = ::pread(evictLogFd_, evicted.data(), logSize, 0);
VELOX_CHECK_EQ(logSize, rc, "Failed to read eviction log");
std::unordered_set<uint32_t> evictedMap;
for (auto region : evicted) {
evictedMap.insert(region);
Expand Down
16 changes: 5 additions & 11 deletions velox/common/caching/SsdFile.h
Original file line number Diff line number Diff line change
Expand Up @@ -367,13 +367,7 @@ class SsdFile {

/// Returns the checkpoint file path.
std::string getCheckpointFilePath() const {
// Faulty file path needs to be handled manually before we switch checkpoint
// file to Velox filesystem.
const std::string faultyPrefix = "faulty:";
std::string checkpointPath = fileName_ + kCheckpointExtension;
return checkpointPath.find(faultyPrefix) == 0
? checkpointPath.substr(faultyPrefix.size())
: checkpointPath;
return fileName_ + kCheckpointExtension;
}

/// Deletes the backing file. Used in testing.
Expand Down Expand Up @@ -483,7 +477,7 @@ class SsdFile {

// Synchronously logs that 'regions' are no longer valid in a possibly
// existing checkpoint.
void logEviction(std::vector<int32_t>& regions);
void logEviction(const std::vector<int32_t>& regions);

// Computes the checksum of data in cache 'entry'.
uint32_t checksumEntry(const AsyncDataCacheEntry& entry) const;
Expand Down Expand Up @@ -578,9 +572,6 @@ class SsdFile {
// WriteFile for cache data file.
std::unique_ptr<WriteFile> writeFile_;

// WriteFile for evict log file.
std::unique_ptr<WriteFile> evictLogWriteFile_;

// Counters.
SsdCacheStats stats_;

Expand All @@ -594,6 +585,9 @@ class SsdFile {
// Count of bytes written after last checkpoint.
std::atomic<uint64_t> bytesAfterCheckpoint_{0};

// fd for logging evictions.
int32_t evictLogFd_{-1};

// True if there was an error with checkpoint and the checkpoint was deleted.
bool checkpointDeleted_{false};
};
Expand Down
1 change: 0 additions & 1 deletion velox/common/caching/tests/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,6 @@ target_link_libraries(
PRIVATE
velox_caching
velox_file
velox_file_test_utils
velox_memory
velox_temp_path
Folly::folly
Expand Down
Loading

0 comments on commit faf5951

Please sign in to comment.