Skip to content

Commit

Permalink
fix: Remove the fdallocate in ssd file space allocation (#11690)
Browse files Browse the repository at this point in the history
Summary:
Pull Request resolved: #11690

fdallocate left brtfs in unknown state and caused the followup operations failed like disable cow. We need to figure out
why disable cow fails after that by checking with brtfs expert. This PR remove the fdallocate before we figure out the root cause.

Reviewed By: zacw7

Differential Revision: D66589915

fbshipit-source-id: 798e146f9709014e5a0b20a9f2ddb39b9f094592
  • Loading branch information
xiaoxmeng authored and facebook-github-bot committed Nov 29, 2024
1 parent feb5b75 commit 281fb04
Show file tree
Hide file tree
Showing 7 changed files with 13 additions and 60 deletions.
7 changes: 0 additions & 7 deletions velox/common/base/Counters.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -265,13 +265,6 @@ void registerVeloxMetrics() {
DEFINE_METRIC(
kMetricSsdCacheRecoveredEntries, facebook::velox::StatType::SUM);

// Total number of local file space allocation failures.
//
// NOTE: space allocation is attempted by fallocate wherever it is supported.
DEFINE_METRIC(
kMetricLocalFileSpaceAllocationFailuresCount,
facebook::velox::StatType::COUNT);

/// ================== Memory Arbitration Counters =================

// The number of arbitration requests.
Expand Down
3 changes: 0 additions & 3 deletions velox/common/base/Counters.h
Original file line number Diff line number Diff line change
Expand Up @@ -328,9 +328,6 @@ constexpr folly::StringPiece kMetricSsdCacheRegionsEvicted{
constexpr folly::StringPiece kMetricSsdCacheRecoveredEntries{
"velox.ssd_cache_recovered_entries"};

constexpr folly::StringPiece kMetricLocalFileSpaceAllocationFailuresCount{
"velox.local_file_space_allocation_failures_count"};

constexpr folly::StringPiece kMetricExchangeDataTimeMs{
"velox.exchange_data_time_ms"};

Expand Down
21 changes: 9 additions & 12 deletions velox/common/caching/SsdFile.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -150,15 +150,11 @@ SsdFile::SsdFile(const Config& config)
writeFile_ = fs_->openFileForWrite(fileName_, fileOptions);
readFile_ = fs_->openFileForRead(fileName_);

// NOTE: checkpoint recovery will set 'numRegions_' and 'dataSize_'
// accordingly.
numRegions_ = 0;
dataSize_ = 0;

const auto maxFileSize = kRegionSize * maxRegions_;
if (writeFile_->size() != maxFileSize) {
// Initialize and pre-allocate (if possible) the data file with fixed space.
writeFile_->truncate(static_cast<int64_t>(maxFileSize));
const uint64_t size = writeFile_->size();
numRegions_ = std::min<int32_t>(size / kRegionSize, maxRegions_);
fileSize_ = numRegions_ * kRegionSize;
if ((size % kRegionSize > 0) || (size > numRegions_ * kRegionSize)) {
writeFile_->truncate(fileSize_);
}
// The existing regions in the file are writable.
writableRegions_.resize(numRegions_);
Expand Down Expand Up @@ -319,8 +315,10 @@ std::optional<std::pair<uint64_t, int32_t>> SsdFile::getSpace(
bool SsdFile::growOrEvictLocked() {
process::TraceContext trace("SsdFile::growOrEvictLocked");
if (numRegions_ < maxRegions_) {
const auto newSize = (numRegions_ + 1) * kRegionSize;
try {
dataSize_ = (numRegions_ + 1) * kRegionSize;
writeFile_->truncate(newSize);
fileSize_ = newSize;
writableRegions_.push_back(numRegions_);
regionSizes_[numRegions_] = 0;
erasedRegionSizes_[numRegions_] = 0;
Expand Down Expand Up @@ -431,7 +429,7 @@ void SsdFile::write(std::vector<CachePin>& pins) {
writeOffset += writeLength;
writeLength = 0;
}
VELOX_CHECK_GE(dataSize_, writeOffset);
VELOX_CHECK_GE(fileSize_, writeOffset);

{
std::lock_guard<std::shared_mutex> l(mutex_);
Expand Down Expand Up @@ -969,7 +967,6 @@ void SsdFile::readCheckpoint(std::ifstream& state) {
maxRegions_,
"Trying to start from checkpoint with a different capacity");
numRegions_ = readNumber<int32_t>(state);
dataSize_ = numRegions_ * kRegionSize;
std::vector<double> scores(maxRegions);
state.read(asChar(scores.data()), maxRegions_ * sizeof(double));
std::unordered_map<uint64_t, StringIdLease> idMap;
Expand Down
4 changes: 2 additions & 2 deletions velox/common/caching/SsdFile.h
Original file line number Diff line number Diff line change
Expand Up @@ -553,8 +553,8 @@ class SsdFile {
// File system.
std::shared_ptr<filesystems::FileSystem> fs_;

// The size of actual cached data in bytes. Must be multiple of kRegionSize.
uint64_t dataSize_{0};
// Size of the backing file in bytes. Must be multiple of kRegionSize.
uint64_t fileSize_{0};

// ReadFile for cache data file.
std::unique_ptr<ReadFile> readFile_;
Expand Down
8 changes: 0 additions & 8 deletions velox/common/caching/tests/AsyncDataCacheTest.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -146,14 +146,6 @@ class AsyncDataCacheTest : public ::testing::TestWithParam<TestParam> {
ssdCacheHelper_ =
std::make_unique<test::SsdCacheTestHelper>(ssdCache.get());
ASSERT_EQ(ssdCacheHelper_->numShards(), kNumSsdShards);
const auto sizeQuantum = kNumSsdShards * SsdFile::kRegionSize;
const auto maxNumRegions = static_cast<int32_t>(
bits::roundUp(config.maxBytes, sizeQuantum) / sizeQuantum);
for (int32_t i = 0; i < kNumSsdShards; ++i) {
ASSERT_EQ(
ssdCacheHelper_->writeFileSize(i),
maxNumRegions * SsdFile::kRegionSize);
}
}
}

Expand Down
9 changes: 2 additions & 7 deletions velox/common/caching/tests/SsdFileTest.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -93,12 +93,10 @@ class SsdFileTest : public testing::Test {
bool checksumEnabled = false,
bool checksumReadVerificationEnabled = false,
bool disableFileCow = false) {
const auto maxNumRegions = static_cast<int32_t>(
bits::roundUp(ssdBytes, SsdFile::kRegionSize) / SsdFile::kRegionSize);
SsdFile::Config config(
fmt::format("{}/ssdtest", tempDirectory_->getPath()),
0, // shardId
maxNumRegions,
bits::roundUp(ssdBytes, SsdFile::kRegionSize) / SsdFile::kRegionSize,
checkpointIntervalBytes,
disableFileCow,
checksumEnabled,
Expand All @@ -107,9 +105,6 @@ class SsdFileTest : public testing::Test {
if (ssdFile_ != nullptr) {
ssdFileHelper_ =
std::make_unique<test::SsdFileTestHelper>(ssdFile_.get());
ASSERT_EQ(
ssdFileHelper_->writeFileSize(),
maxNumRegions * ssdFile_->kRegionSize);
}
}

Expand Down Expand Up @@ -666,7 +661,7 @@ TEST_F(SsdFileTest, recoverFromCheckpointWithChecksum) {
ASSERT_EQ(statsAfterRecover.entriesCached, stats.entriesCached);
} else {
ASSERT_EQ(statsAfterRecover.bytesCached, 0);
ASSERT_EQ(statsAfterRecover.regionsCached, 0);
ASSERT_EQ(statsAfterRecover.regionsCached, stats.regionsCached);
ASSERT_EQ(statsAfterRecover.entriesCached, 0);
}

Expand Down
21 changes: 0 additions & 21 deletions velox/common/file/File.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -15,9 +15,7 @@
*/

#include "velox/common/file/File.h"
#include "velox/common/base/Counters.h"
#include "velox/common/base/Fs.h"
#include "velox/common/base/StatsReporter.h"

#include <fmt/format.h>
#include <glog/logging.h>
Expand Down Expand Up @@ -379,25 +377,6 @@ void LocalWriteFile::write(
void LocalWriteFile::truncate(int64_t newSize) {
checkNotClosed(closed_);
VELOX_CHECK_GE(newSize, 0, "New size cannot be negative.");
#ifdef linux
if (newSize > size_) {
// Use fallocate to extend the file.
const auto ret = ::fallocate(fd_, 0, 0, newSize);
try {
VELOX_CHECK_EQ(
ret,
0,
"fallocate failed in LocalWriteFile::truncate: {}.",
folly::errnoStr(errno));
size_ = newSize;
return;
} catch (const std::exception& /*e*/) {
RECORD_METRIC_VALUE(kMetricLocalFileSpaceAllocationFailuresCount);
}
}
#endif // linux

// Fallback to ftruncate.
const auto ret = ::ftruncate(fd_, newSize);
VELOX_CHECK_EQ(
ret,
Expand Down

0 comments on commit 281fb04

Please sign in to comment.