diff --git a/presto-native-execution/presto_cpp/main/LinuxMemoryChecker.cpp b/presto-native-execution/presto_cpp/main/LinuxMemoryChecker.cpp index 13f2999375628..1f630ee8a8055 100644 --- a/presto-native-execution/presto_cpp/main/LinuxMemoryChecker.cpp +++ b/presto-native-execution/presto_cpp/main/LinuxMemoryChecker.cpp @@ -18,6 +18,7 @@ #include #include "presto_cpp/main/PeriodicMemoryChecker.h" #include "presto_cpp/main/common/Configs.h" +#include "presto_cpp/main/common/Utils.h" namespace facebook::presto { @@ -29,13 +30,32 @@ class LinuxMemoryChecker : public PeriodicMemoryChecker { // it's mounted. struct stat buffer; if ((stat(kCgroupV1Path, &buffer) == 0)) { - statFile_ = kCgroupV1Path; - } else if ((stat(kCgroupV2Path, &buffer) == 0)) { - statFile_ = kCgroupV2Path; - } else { - statFile_ = "None"; + PRESTO_STARTUP_LOG(INFO) << "Using cgroup v1."; + if (stat(kCgroupV1MemStatFile, &buffer) == 0) { + memStatFile_ = kCgroupV1MemStatFile; + } + if ((stat(kCgroupV1MaxMemFile, &buffer) == 0)) { + memMaxFile_ = kCgroupV1MaxMemFile; + } } - LOG(INFO) << fmt::format("Using memory stat file {}", statFile_); + + // In cgroup v2. + else { + PRESTO_STARTUP_LOG(INFO) << "Using cgroup v2."; + if (stat(kCgroupV2MemStatFile, &buffer) == 0) { + memStatFile_ = kCgroupV2MemStatFile; + } + if ((stat(kCgroupV2MaxMemFile, &buffer) == 0)) { + memMaxFile_ = kCgroupV2MaxMemFile; + } + } + + PRESTO_STARTUP_LOG(INFO) << fmt::format( + "Using memory stat file: {}", + memStatFile_.empty() ? memInfoFile_ : memStatFile_); + PRESTO_STARTUP_LOG(INFO) << fmt::format( + "Using memory max file {}", + memMaxFile_.empty() ? memInfoFile_ : memMaxFile_); } ~LinuxMemoryChecker() override {} @@ -45,8 +65,90 @@ class LinuxMemoryChecker : public PeriodicMemoryChecker { } void setStatFile(std::string statFile) { - statFile_ = statFile; - LOG(INFO) << fmt::format("Changed to using memory stat file {}", statFile_); + memStatFile_ = statFile; + LOG(INFO) << fmt::format( + "Changed to using memory stat file {}", memStatFile_); + } + + void setMemMaxFile(const std::string& memMaxFile) { + memMaxFile_ = memMaxFile; + LOG(INFO) << fmt::format( + "Changed to using memory max file {}", memMaxFile_); + } + + void setMemInfoFile(const std::string& memInfoFile) { + memInfoFile_ = memInfoFile; + LOG(INFO) << fmt::format("Changed to using meminfo file {}", memInfoFile_); + } + + void start() override { + // Check system-memory-gb < system-mem-limit-gb < memory limit for process. + auto* systemConfig = SystemConfig::instance(); + int64_t systemMemoryInBytes = systemConfig->systemMemoryGb() << 30; + PRESTO_STARTUP_LOG(INFO) + << fmt::format("System memory in bytes: {}", systemMemoryInBytes); + + PRESTO_STARTUP_LOG(INFO) << fmt::format( + "System memory limit in bytes: {}", config_.systemMemLimitBytes); + + auto memoryLimitForProcess = getMemoryLimitForProcess(); + PRESTO_STARTUP_LOG(INFO) << fmt::format( + "Memory limit for process in bytes: {}", memoryLimitForProcess); + + VELOX_CHECK_LE( + config_.systemMemLimitBytes, + memoryLimitForProcess, + "system memory limit = {} bytes is higher than the memory limit for process = {} bytes.", + config_.systemMemLimitBytes, + memoryLimitForProcess); + + if (config_.systemMemLimitBytes < systemMemoryInBytes) { + LOG(WARNING) << "system-mem-limit-gb is smaller than system-memory-gb. " + << "Expected: system-mem-limit-gb >= system-memory-gb."; + } + + PeriodicMemoryChecker::start(); + } + + int64_t getMemoryLimitForProcess() { + // Set the memory limit for process to be the smaller number between + // /proc/meminfo and memMaxFile_. + int64_t memoryLimitForProcess = 0; + // meminfo's units is in kB. + folly::gen::byLine(memInfoFile_.c_str()) | + [&](const folly::StringPiece& line) -> void { + if (memoryLimitForProcess != 0) { + return; + } + memoryLimitForProcess = static_cast( + extractNumericConfigValueWithRegex(line, kMemTotalRegex) * 1024); + }; + + // For cgroup v1, memory.limit_in_bytes can default to a really big numeric + // value in bytes like 9223372036854771712 to represent that + // memory.limit_in_bytes is not set to a value. The default value here is + // set to PAGE_COUNTER_MAX, which is LONG_MAX/PAGE_SIZE on the 64-bit + // platform. The default value can vary based upon the platform's PAGE_SIZE. + // If memory.limit_in_bytes contains a really big numeric value, then we + // will use MemTotal from /proc/meminfo. + + // For cgroup v2, memory.max can contain a numeric value in bytes or string + // "max" which represents no value has been set. If memory.max contains + // "max", then we will use MemTotal from /proc/meminfo. + if (!memMaxFile_.empty()) { + folly::gen::byLine(memMaxFile_.c_str()) | + [&](const folly::StringPiece& line) -> void { + if (line == "max") { + return; + } + memoryLimitForProcess = + std::min(memoryLimitForProcess, folly::to(line)); + return; + }; + } + + // Unit is in bytes. + return memoryLimitForProcess; } protected: @@ -80,8 +182,8 @@ class LinuxMemoryChecker : public PeriodicMemoryChecker { size_t inactiveAnon = 0; size_t activeAnon = 0; - if (statFile_ != "None") { - folly::gen::byLine(statFile_.c_str()) | + if (!memStatFile_.empty()) { + folly::gen::byLine(memStatFile_.c_str()) | [&](const folly::StringPiece& line) -> void { if (inactiveAnon == 0) { inactiveAnon = @@ -103,7 +205,7 @@ class LinuxMemoryChecker : public PeriodicMemoryChecker { } // Last resort use host machine info. - folly::gen::byLine("/proc/meminfo") | + folly::gen::byLine(memInfoFile_.c_str()) | [&](const folly::StringPiece& line) -> void { if (memAvailable == 0) { memAvailable = @@ -143,10 +245,16 @@ class LinuxMemoryChecker : public PeriodicMemoryChecker { const boost::regex kInactiveAnonRegex{R"!(inactive_anon\s*(\d+)\s*)!"}; const boost::regex kActiveAnonRegex{R"!(active_anon\s*(\d+)\s*)!"}; const boost::regex kMemAvailableRegex{R"!(MemAvailable:\s*(\d+)\s*kB)!"}; - const boost::regex kMemTotalRegex{R"!(MemTotal:\s*(\d+)\s*kB)!"}; - const char* kCgroupV1Path = "/sys/fs/cgroup/memory/memory.stat"; - const char* kCgroupV2Path = "/sys/fs/cgroup/memory.stat"; - std::string statFile_; + const boost::regex kMemTotalRegex{R"!(MemTotal:\s*(\d+)\s+kB)!"}; + const char* kCgroupV1Path = "/sys/fs/cgroup/memory"; + const char* kCgroupV1MemStatFile = "/sys/fs/cgroup/memory/memory.stat"; + const char* kCgroupV2MemStatFile = "/sys/fs/cgroup/memory.stat"; + const char* kCgroupV1MaxMemFile = + "/sys/fs/cgroup/memory/memory.limit_in_bytes"; + const char* kCgroupV2MaxMemFile = "/sys/fs/cgroup/memory.max"; + std::string memInfoFile_ = "/proc/meminfo"; + std::string memStatFile_; + std::string memMaxFile_; size_t extractNumericConfigValueWithRegex( const folly::StringPiece& line, diff --git a/presto-native-execution/presto_cpp/main/PeriodicMemoryChecker.h b/presto-native-execution/presto_cpp/main/PeriodicMemoryChecker.h index d16fecdcf92e2..f894d75a0276a 100644 --- a/presto-native-execution/presto_cpp/main/PeriodicMemoryChecker.h +++ b/presto-native-execution/presto_cpp/main/PeriodicMemoryChecker.h @@ -71,10 +71,10 @@ class PeriodicMemoryChecker { /// Starts the 'PeriodicMemoryChecker'. A background scheduler will be /// launched to perform the checks. This should only be called once. - void start(); + virtual void start(); /// Stops the 'PeriodicMemoryChecker'. - void stop(); + virtual void stop(); protected: /// Returns current system memory usage. The returned value is used to compare diff --git a/presto-native-execution/presto_cpp/main/tests/LinuxMemoryCheckerTest.cpp b/presto-native-execution/presto_cpp/main/tests/LinuxMemoryCheckerTest.cpp index 595ee1cae94f9..e3d99ebd1d07a 100644 --- a/presto-native-execution/presto_cpp/main/tests/LinuxMemoryCheckerTest.cpp +++ b/presto-native-execution/presto_cpp/main/tests/LinuxMemoryCheckerTest.cpp @@ -17,29 +17,102 @@ #include #include "velox/common/base/VeloxException.h" #include "velox/common/base/tests/GTestUtils.h" +#include "velox/exec/tests/utils/TempFilePath.h" -namespace fs = boost::filesystem; - -namespace { -std::string getStatsFilePath(const std::string& fileName) { - return fs::current_path().string() + "/examples/" + fileName; -} -} // namespace +using namespace facebook::velox; namespace facebook::presto { class LinuxMemoryCheckerTest : public testing::Test { protected: - LinuxMemoryChecker memChecker; + LinuxMemoryChecker memChecker_; + + LinuxMemoryCheckerTest() : memChecker_(PeriodicMemoryChecker::Config{}) {} + + void checkMemoryLimitForProcess( + const std::string& content, + int64_t expectedMemoryMax) { + auto tempMemInfoFile = exec::test::TempFilePath::create(); + tempMemInfoFile->append(kMemInfoText_); + auto memInfoPath = tempMemInfoFile->getPath(); - LinuxMemoryCheckerTest() : memChecker(PeriodicMemoryChecker::Config{}) {} + auto tempTestFile = exec::test::TempFilePath::create(); + tempTestFile->append(content); + auto testFilePath = tempTestFile->getPath(); + + memChecker_.setMemInfoFile(memInfoPath); + memChecker_.setMemMaxFile(testFilePath); + ASSERT_EQ(memChecker_.getMemoryLimitForProcess(), expectedMemoryMax); + } void checkMemoryUsage( - const std::string& statFileName, + const std::string& content, int64_t expectedMemoryUsage) { - auto statFilePath = getStatsFilePath(statFileName); - memChecker.setStatFile(statFilePath); - ASSERT_EQ(memChecker.getUsedMemory(), expectedMemoryUsage); + auto tempTestFile = exec::test::TempFilePath::create(); + tempTestFile->append(content); + auto testFilePath = tempTestFile->getPath(); + + memChecker_.setStatFile(testFilePath); + ASSERT_EQ(memChecker_.getUsedMemory(), expectedMemoryUsage); } + + const std::string kMemInfoText_ = + "MemTotal: 129294272 kB\n" + "MemFree: 127334232 kB\n" + "MemAvailable: 127637400 kB\n" + "Buffers: 2948 kB\n" + "Cached: 1315676 kB\n" + "SwapCached: 0 kB\n" + "Active: 769056 kB\n" + "Inactive: 810360 kB\n" + "Active(anon): 277920 kB\n" + "Inactive(anon): 0 kB\n" + "Active(file): 491136 kB\n" + "Inactive(file): 810360 kB\n" + "Unevictable: 12 kB\n" + "Mlocked: 12 kB\n" + "SwapTotal: 0 kB\n" + "SwapFree: 0 kB\n" + "Zswap: 0 kB\n" + "Zswapped: 0 kB\n" + "Dirty: 4 kB\n" + "Writeback: 0 kB\n" + "AnonPages: 257672 kB\n" + "Mapped: 341044 kB\n" + "Shmem: 17128 kB\n" + "KReclaimable: 70060 kB\n" + "Slab: 172876 kB\n" + "SReclaimable: 70060 kB\n" + "SUnreclaim: 102816 kB\n" + "KernelStack: 6640 kB\n" + "PageTables: 5832 kB\n" + "SecPageTables: 0 kB\n" + "NFS_Unstable: 0 kB\n" + "Bounce: 0 kB\n" + "WritebackTmp: 0 kB\n" + "CommitLimit: 64647136 kB\n" + "Committed_AS: 1077692288 kB\n" + "VmallocTotal: 34359738367 kB\n" + "VmallocUsed: 22332 kB\n" + "VmallocChunk: 0 kB\n" + "Percpu: 7616 kB\n" + "HardwareCorrupted: 0 kB\n" + "AnonHugePages: 141312 kB\n" + "ShmemHugePages: 0 kB\n" + "ShmemPmdMapped: 0 kB\n" + "FileHugePages: 2048 kB\n" + "FilePmdMapped: 0 kB\n" + "CmaTotal: 0 kB\n" + "CmaFree: 0 kB\n" + "Unaccepted: 0 kB\n" + "HugePages_Total: 0\n" + "HugePages_Free: 0\n" + "HugePages_Rsvd: 0\n" + "HugePages_Surp: 0\n" + "Hugepagesize: 2048 kB\n" + "Hugetlb: 0 kB\n" + "DirectMap4k: 264100 kB\n" + "DirectMap2M: 9156608 kB\n" + "DirectMap1G: 122683392 kB\n"; }; TEST_F(LinuxMemoryCheckerTest, basic) { @@ -68,20 +141,192 @@ TEST_F(LinuxMemoryCheckerTest, basic) { ASSERT_NO_THROW(memChecker.stop()); } +TEST_F(LinuxMemoryCheckerTest, sysMemLimitBytesCheck) { + auto tempMemInfoFile = exec::test::TempFilePath::create(); + tempMemInfoFile->append(kMemInfoText_); + auto memInfoPath = tempMemInfoFile->getPath(); + + auto tempTestFile = exec::test::TempFilePath::create(); + tempTestFile->append("131000000000\n"); + auto testFilePath = tempTestFile->getPath(); + + // system-mem-limit-gb should be set less than or equal to + // the memory limit for process available. + // systemMemLimitBytes = 130,000,000,000 bytes. + // memory limit for process = 131,000,000,000 bytes. + LinuxMemoryChecker memChecker(PeriodicMemoryChecker::Config{ + 1'000, + true, + 130000000000, + 32, + true, + 5, + "/path/to/dir", + "prefix", + 5, + 512}); + memChecker.setMemInfoFile(memInfoPath); + memChecker.setMemMaxFile(testFilePath); + ASSERT_NO_THROW(memChecker.start()); + ASSERT_NO_THROW(memChecker.stop()); + + // systemMemLimitBytes = 131,000,000,001 bytes. + // memory limit for process = 131,000,000,000 bytes. + LinuxMemoryChecker memChecker2(PeriodicMemoryChecker::Config{ + 1'000, + true, + 131000000001, + 32, + true, + 5, + "/path/to/dir", + "prefix", + 5, + 512}); + memChecker2.setMemInfoFile(memInfoPath); + memChecker2.setMemMaxFile(testFilePath); + VELOX_ASSERT_THROW(memChecker2.start(), "(131000000001 vs. 131000000000)"); + VELOX_ASSERT_THROW(memChecker2.stop(), ""); +} + +TEST_F(LinuxMemoryCheckerTest, memory131gbMax) { + // Testing for cgroup v1 and v2. + // memory131gb.max is 131,000,000,000 bytes. + // meminfo is 132,397,334,528 bytes. + // The expected memory limit for process should be 131,000,000,000 bytes here. + checkMemoryLimitForProcess("131000000000\n", 131000000000); +} + +TEST_F(LinuxMemoryCheckerTest, memory133gbMax) { + // Testing for cgroup v1 and v2. + // memory133gb.max is 133,000,000,000 bytes. + // meminfo is 132,397,334,528 bytes. + // The expected memory limit for process should be 132,397,334,528 bytes here. + checkMemoryLimitForProcess("133000000000\n", 132397334528); +} + +TEST_F(LinuxMemoryCheckerTest, cgroupV1MemoryMaxNotSet) { + // Testing for cgroup v1. + // When memory.limit_in_bytes is not set to a value, it could default to + // a huge value like 9223372036854771712 bytes. + // The default value is set to PAGE_COUNTER_MAX, which is LONG_MAX/PAGE_SIZE + // on 64-bit platform. The default value can vary based upon the platform's + // PAGE_SIZE. + + // cgroupV1memoryNotSet.limit_in_bytes is 9,223,372,036,854,771,712 bytes. + // meminfo is 132,397,334,528 bytes. + // The expected memory limit for process should be 132,397,334,528 bytes here. + checkMemoryLimitForProcess("9223372036854771712\n", 132397334528); +} + +TEST_F(LinuxMemoryCheckerTest, cgroupV2MemoryMaxNotSet) { + // Testing for cgroup v2. + // When memory.max is not set to a value, it defaults to contain string "max". + + // cgroupV2memoryNotSet.max is "max". + // meminfo is 132,397,334,528 bytes. + // The expected memory limit for process should be 132,397,334,528 bytes here. + checkMemoryLimitForProcess("max\n", 132397334528); +} + TEST_F(LinuxMemoryCheckerTest, memoryStatFileV1) { // Testing cgroup v1 memory.stat file. - checkMemoryUsage("cgroupV1memory.stat", 5136384); + const std::string content = + "cache 39313408\n" + "rss 3600384\n" + "rss_huge 0\n" + "shmem 1757184\n" + "mapped_file 12705792\n" + "dirty 0\n" + "writeback 0\n" + "pgpgin 97614\n" + "pgpgout 87091\n" + "pgfault 55869\n" + "pgmajfault 132\n" + "inactive_anon 1486848\n" + "active_anon 3649536\n" + "inactive_file 20410368\n" + "active_file 11894784\n" + "unevictable 5406720\n" + "hierarchical_memory_limit 9223372036854771712\n" + "total_cache 239964160\n" + "total_rss 109146112\n" + "total_rss_huge 0\n" + "total_shmem 2420736\n" + "total_mapped_file 84344832\n" + "total_dirty 135168\n" + "total_writeback 0\n" + "total_pgpgin 291606\n" + "total_pgpgout 205533\n" + "total_pgfault 296666\n" + "total_pgmajfault 792\n" + "total_inactive_anon 1486848\n" + "total_active_anon 101105664\n" + "total_inactive_file 165715968\n" + "total_active_file 65421312\n" + "total_unevictable 18653184\n"; + checkMemoryUsage(content, 5136384); } TEST_F(LinuxMemoryCheckerTest, memoryStatFileV2) { // Testing cgroup v2 memory.stat file. - checkMemoryUsage("cgroupV2memory.stat", 274713448448); + const std::string content = + "anon 274528108544\n" + "file 578768896\n" + "kernel 565014528\n" + "kernel_stack 9388032\n" + "pagetables 543928320\n" + "percpu 14040\n" + "sock 102400\n" + "vmalloc 86016\n" + "shmem 0\n" + "zswap 0\n" + "zswapped 0\n" + "file_mapped 0\n" + "file_dirty 1142784\n" + "file_writeback 0\n" + "swapcached 0\n" + "anon_thp 269563723776\n" + "file_thp 0\n" + "shmem_thp 0\n" + "inactive_anon 274713391104\n" + "active_anon 57344\n" + "inactive_file 194953216\n" + "active_file 383688704\n" + "unevictable 0\n" + "slab_reclaimable 3674304\n" + "slab_unreclaimable 7674312\n" + "slab 11348616\n" + "workingset_refault_anon 0\n" + "workingset_refault_file 0\n" + "workingset_activate_anon 0\n" + "workingset_activate_file 0\n" + "workingset_restore_anon 0\n" + "workingset_restore_file 0\n" + "workingset_nodereclaim 0\n" + "pgscan 0\n" + "pgsteal 0\n" + "pgscan_kswapd 0\n" + "pgscan_direct 0\n" + "pgsteal_kswapd 0\n" + "pgsteal_direct 0\n" + "pgfault 147931033\n" + "pgmajfault 0\n" + "pgrefill 0\n" + "pgactivate 490211\n" + "pgdeactivate 0\n" + "pglazyfree 0\n" + "pglazyfreed 0\n" + "zswpin 0\n" + "zswpout 0\n" + "thp_fault_alloc 547392\n" + "thp_collapse_alloc 0\n"; + checkMemoryUsage(content, 274713448448); } TEST_F(LinuxMemoryCheckerTest, hostMachineInfo) { - // Testing host machine info /proc/meminfo when None is specified for stat - // file. - memChecker.setStatFile("None"); - ASSERT_GT(memChecker.getUsedMemory(), 0); + // Testing host machine info /proc/meminfo for tracking current system memory + // usage. + ASSERT_GT(memChecker_.getUsedMemory(), 0); } } // namespace facebook::presto diff --git a/presto-native-execution/presto_cpp/main/tests/examples/cgroupV1memory.stat b/presto-native-execution/presto_cpp/main/tests/examples/cgroupV1memory.stat deleted file mode 100644 index 0119c0051b21a..0000000000000 --- a/presto-native-execution/presto_cpp/main/tests/examples/cgroupV1memory.stat +++ /dev/null @@ -1,33 +0,0 @@ -cache 39313408 -rss 3600384 -rss_huge 0 -shmem 1757184 -mapped_file 12705792 -dirty 0 -writeback 0 -pgpgin 97614 -pgpgout 87091 -pgfault 55869 -pgmajfault 132 -inactive_anon 1486848 -active_anon 3649536 -inactive_file 20410368 -active_file 11894784 -unevictable 5406720 -hierarchical_memory_limit 9223372036854771712 -total_cache 239964160 -total_rss 109146112 -total_rss_huge 0 -total_shmem 2420736 -total_mapped_file 84344832 -total_dirty 135168 -total_writeback 0 -total_pgpgin 291606 -total_pgpgout 205533 -total_pgfault 296666 -total_pgmajfault 792 -total_inactive_anon 1486848 -total_active_anon 101105664 -total_inactive_file 165715968 -total_active_file 65421312 -total_unevictable 18653184 diff --git a/presto-native-execution/presto_cpp/main/tests/examples/cgroupV2memory.stat b/presto-native-execution/presto_cpp/main/tests/examples/cgroupV2memory.stat deleted file mode 100644 index 5833528757b39..0000000000000 --- a/presto-native-execution/presto_cpp/main/tests/examples/cgroupV2memory.stat +++ /dev/null @@ -1,50 +0,0 @@ -anon 274528108544 -file 578768896 -kernel 565014528 -kernel_stack 9388032 -pagetables 543928320 -percpu 14040 -sock 102400 -vmalloc 86016 -shmem 0 -zswap 0 -zswapped 0 -file_mapped 0 -file_dirty 1142784 -file_writeback 0 -swapcached 0 -anon_thp 269563723776 -file_thp 0 -shmem_thp 0 -inactive_anon 274713391104 -active_anon 57344 -inactive_file 194953216 -active_file 383688704 -unevictable 0 -slab_reclaimable 3674304 -slab_unreclaimable 7674312 -slab 11348616 -workingset_refault_anon 0 -workingset_refault_file 0 -workingset_activate_anon 0 -workingset_activate_file 0 -workingset_restore_anon 0 -workingset_restore_file 0 -workingset_nodereclaim 0 -pgscan 0 -pgsteal 0 -pgscan_kswapd 0 -pgscan_direct 0 -pgsteal_kswapd 0 -pgsteal_direct 0 -pgfault 147931033 -pgmajfault 0 -pgrefill 0 -pgactivate 490211 -pgdeactivate 0 -pglazyfree 0 -pglazyfreed 0 -zswpin 0 -zswpout 0 -thp_fault_alloc 547392 -thp_collapse_alloc 0