From 2bb306db224b141897100913ff7a03111851388d Mon Sep 17 00:00:00 2001 From: Jamie Liu Date: Mon, 11 Nov 2024 22:54:37 -0800 Subject: [PATCH] kernel: start RLIMIT_CPU timers in NewThreadGroup Before cl/695198313, this bug only affected RLIMIT_CPU soft limits, which were represented by tg.rlimitCPUSoftSetting and was similarly uninitialized by Kernel.NewThreadGroup(); the CPU clock ticker fetched RLIMIT_CPU hard limits in each tick. After cl/695198313, this bug affects both RLIMIT_CPU soft and hard limits. Itimers don't have the same issue since they're not preserved across fork(). PiperOrigin-RevId: 695605349 --- pkg/sentry/kernel/task_acct.go | 20 ++++++---- pkg/sentry/kernel/thread_group.go | 1 + test/syscalls/linux/timers.cc | 63 +++++++++++++++++++++++++++++-- 3 files changed, 73 insertions(+), 11 deletions(-) diff --git a/pkg/sentry/kernel/task_acct.go b/pkg/sentry/kernel/task_acct.go index f377015146..24319692d8 100644 --- a/pkg/sentry/kernel/task_acct.go +++ b/pkg/sentry/kernel/task_acct.go @@ -90,18 +90,22 @@ func (t *Task) Setitimer(id int32, newitv linux.ItimerVal) (linux.ItimerVal, err // // Preconditions: The caller must be running on the task goroutine. func (t *Task) NotifyRlimitCPUUpdated() { - // Lock t.tg.timerMu to synchronize updates to these timers between tasks - // in t.tg. - t.tg.timerMu.Lock() - defer t.tg.timerMu.Unlock() - rlimitCPU := t.tg.limits.Get(limits.CPU) - t.tg.appSysCPUClockLast.Store(t) - t.tg.rlimitCPUSoftTimer.Set(ktime.Setting{ + t.tg.notifyRlimitCPUUpdated(t) +} + +func (tg *ThreadGroup) notifyRlimitCPUUpdated(t *Task) { + // Lock tg.timerMu to synchronize updates to these timers between tasks in + // tg. + tg.timerMu.Lock() + defer tg.timerMu.Unlock() + rlimitCPU := tg.limits.Get(limits.CPU) + tg.appSysCPUClockLast.Store(t) + tg.rlimitCPUSoftTimer.Set(ktime.Setting{ Enabled: rlimitCPU.Cur != limits.Infinity, Next: ktime.FromSeconds(int64(min(rlimitCPU.Cur, math.MaxInt64))), Period: time.Second, }, nil) - t.tg.rlimitCPUHardTimer.Set(ktime.Setting{ + tg.rlimitCPUHardTimer.Set(ktime.Setting{ Enabled: rlimitCPU.Max != limits.Infinity, Next: ktime.FromSeconds(int64(min(rlimitCPU.Max, math.MaxInt64))), }, nil) diff --git a/pkg/sentry/kernel/thread_group.go b/pkg/sentry/kernel/thread_group.go index 26566de04f..c06485dacb 100644 --- a/pkg/sentry/kernel/thread_group.go +++ b/pkg/sentry/kernel/thread_group.go @@ -305,6 +305,7 @@ func (k *Kernel) NewThreadGroup(pidns *PIDNamespace, sh *SignalHandlers, termina tg.rlimitCPUSoftListener.tg = tg tg.rlimitCPUHardTimer.Init(&tg.appSysCPUClock, &tg.rlimitCPUHardListener) tg.rlimitCPUHardListener.tg = tg + tg.notifyRlimitCPUUpdated(nil) tg.oldRSeqCritical.Store(&OldRSeqCriticalRegion{}) return tg } diff --git a/test/syscalls/linux/timers.cc b/test/syscalls/linux/timers.cc index bc12dd4af6..70efe88ac6 100644 --- a/test/syscalls/linux/timers.cc +++ b/test/syscalls/linux/timers.cc @@ -50,13 +50,15 @@ namespace { #define CPUCLOCK_PROF 0 #endif // CPUCLOCK_PROF -PosixErrorOr ProcessCPUTime(pid_t pid) { +clockid_t ProcessCPUClock(pid_t pid) { // Use pid-specific CPUCLOCK_PROF, which is the clock used to enforce // RLIMIT_CPU. - clockid_t clockid = (~static_cast(pid) << 3) | CPUCLOCK_PROF; + return (~static_cast(pid) << 3) | CPUCLOCK_PROF; +} +PosixErrorOr ProcessCPUTime(pid_t pid) { struct timespec ts; - int ret = clock_gettime(clockid, &ts); + int ret = clock_gettime(ProcessCPUClock(pid), &ts); if (ret < 0) { return PosixError(errno, "clock_gettime failed"); } @@ -223,6 +225,61 @@ TEST(TimerTest, ProcessKilledOnCPUHardLimit) { EXPECT_GE(cpu, kHardLimit); } +TEST(TimerTest, RlimitCpuInherited) { + pid_t child_pid = fork(); + MaybeSave(); + if (child_pid == 0) { + // Ignore SIGXCPU from the RLIMIT_CPU soft limit. + struct sigaction new_action; + new_action.sa_handler = NoopSignalHandler; + new_action.sa_flags = 0; + sigemptyset(&new_action.sa_mask); + TEST_PCHECK(sigaction(SIGXCPU, &new_action, nullptr) == 0); + + // Set both soft and hard limits to expire a short time from now. (Since we + // may not be able to raise RLIMIT_CPU again, this must happen in a + // disposable child of the test process.) + constexpr int kDelaySeconds = 2; + struct timespec ts; + TEST_PCHECK(clock_gettime(CLOCK_PROCESS_CPUTIME_ID, &ts) == 0); + struct rlimit cpu_limits; + // +1 to round up, presuming that ts.tv_nsec > 0. + cpu_limits.rlim_cur = cpu_limits.rlim_max = ts.tv_sec + kDelaySeconds + 1; + TEST_PCHECK(setrlimit(RLIMIT_CPU, &cpu_limits) == 0); + MaybeSave(); + + pid_t grandchild_pid = fork(); + MaybeSave(); + if (grandchild_pid == 0) { + // Burn CPU. + for (;;) { + int x = 0; + benchmark::DoNotOptimize(x); // Don't optimize this loop away. + } + } + TEST_PCHECK(grandchild_pid > 0); + + // Wait for the grandchild to exit, but do not reap it. This will allow us + // to check its CPU usage while it is zombied. + TEST_PCHECK(waitid(P_PID, grandchild_pid, nullptr, WEXITED | WNOWAIT) == 0); + MaybeSave(); + TEST_PCHECK(clock_gettime(ProcessCPUClock(grandchild_pid), &ts) == 0); + TEST_CHECK(ts.tv_sec >= cpu_limits.rlim_max); + // Reap the grandchild and check that it was SIGKILLed by the RLIMIT_CPU + // hard limit. + int status; + TEST_PCHECK(waitpid(grandchild_pid, &status, 0) == grandchild_pid); + TEST_CHECK(WIFSIGNALED(status) && (WTERMSIG(status) == SIGKILL)); + _exit(0); + } + + int status; + ASSERT_THAT(waitpid(child_pid, &status, 0), + SyscallSucceedsWithValue(child_pid)); + EXPECT_TRUE(WIFEXITED(status) && (WEXITSTATUS(status) == 0)) + << "status = " << status; +} + // See timerfd.cc:TimerSlack() for rationale. constexpr absl::Duration kTimerSlack = absl::Milliseconds(500);