Skip to content

Commit

Permalink
Catch bugs that require starting a high-priority-only interval right …
Browse files Browse the repository at this point in the history
…at the start of the application
  • Loading branch information
rocallahan committed Sep 21, 2024
1 parent 08357f6 commit 04c5add
Show file tree
Hide file tree
Showing 5 changed files with 80 additions and 14 deletions.
1 change: 1 addition & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -2108,6 +2108,7 @@ if(BUILD_TESTS)
pipe_wakeup
mmap_adjacent
mmap_bits
startup
starvation_multithreaded
starvation_singlethreaded
)
Expand Down
59 changes: 45 additions & 14 deletions src/Scheduler.cc
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,12 @@ static double low_priority_probability = 0.1;
// many tests are basically main-thread-only
static double main_thread_low_priority_probability = 0.3;
static double very_short_timeslice_probability = 0.1;
// For low priority tasks, assign some probability of being treated
// as medium priority until their first yield.
// This lets a low priority task run until it unblocks the execution of
// a high-priority task and then never run again during a
// high-priority-only interval. See the `startup` test.
static double postpone_low_priority_until_after_yield = 0.2;
static Ticks very_short_timeslice_max_duration = 100;
static double short_timeslice_probability = 0.1;
static Ticks short_timeslice_max_duration = 10000;
Expand Down Expand Up @@ -70,8 +76,9 @@ static double priorities_refresh_max_interval = 20;
* running time. Then to maximise the probability of triggering the test
* failure, we start high-priority-only intervals as often as possible,
* i.e. one for D' seconds starting every 5xD' seconds.
* The start time of the first interval is chosen uniformly randomly to be
* between 0 and 4xD'.
* The start time of the first interval is chosen to be between 0 and 4xD'.
* To make sure we capture startup effects, we choose 0 with probability 0.25
* and uniformly between 0 and 4xD' otherwise.
* Then, if we guessed D' and the low-priority thread correctly, the
* probability of triggering the test failure is 1 if T >= 4xD', T/4xD'
* otherwise, i.e. >= T/8xD. (Higher values of D' than optimal can also trigger
Expand All @@ -83,6 +90,7 @@ static int high_priority_only_duration_steps = 12;
static double high_priority_only_duration_step_factor = 2;
// Allow this much of overall runtime to be in the "high priority only" interval
static double high_priority_only_fraction = 0.2;
static double start_high_priority_only_immediately_probability = 0.25;

Scheduler::Scheduler(RecordSession& session)
: reschedule_count(0),
Expand Down Expand Up @@ -179,10 +187,20 @@ void Scheduler::set_num_cores(int cores) {

static double random_frac() { return double(random() % INT32_MAX) / INT32_MAX; }

static const int CHAOS_MODE_HIGH_PRIORITY = 0;
static const int CHAOS_MODE_MEDIUM_PRIORITY_UNTIL_NEXT_YIELD = 1;
static const int CHAOS_MODE_LOW_PRIORITY = 2;

int Scheduler::choose_random_priority(RecordTask* t) {
double prob = t->tgid() == t->tid ? main_thread_low_priority_probability
: low_priority_probability;
return random_frac() < prob;
if (random_frac() < prob) {
if (random_frac() < postpone_low_priority_until_after_yield) {
return CHAOS_MODE_MEDIUM_PRIORITY_UNTIL_NEXT_YIELD;
}
return CHAOS_MODE_LOW_PRIORITY;
}
return CHAOS_MODE_HIGH_PRIORITY;
}

static bool treat_syscall_as_nonblocking(int syscallno, SupportedArch arch) {
Expand Down Expand Up @@ -484,20 +502,30 @@ void Scheduler::maybe_reset_priorities(double now) {
}
}

void Scheduler::notify_descheduled(RecordTask* t) {
if (!enable_chaos || t->priority != CHAOS_MODE_MEDIUM_PRIORITY_UNTIL_NEXT_YIELD) {
return;
}
LOGM(debug) << "Lowering priority of " << t->tid << " after descheduling";
update_task_priority_internal(t, CHAOS_MODE_LOW_PRIORITY);
}

void Scheduler::maybe_reset_high_priority_only_intervals(double now) {
if (!enable_chaos || high_priority_only_intervals_refresh_time > now) {
return;
}
int duration_step = random() % high_priority_only_duration_steps;
int duration_step = 11;
high_priority_only_intervals_duration =
min_high_priority_only_duration *
pow(high_priority_only_duration_step_factor, duration_step);
high_priority_only_intervals_period =
high_priority_only_intervals_duration / high_priority_only_fraction;
high_priority_only_intervals_start =
now +
random_frac() * (high_priority_only_intervals_period -
high_priority_only_intervals_duration);
high_priority_only_intervals_start = now;
if (random_frac() >= start_high_priority_only_immediately_probability) {
high_priority_only_intervals_start +=
random_frac() * (high_priority_only_intervals_period -
high_priority_only_intervals_duration);
}
high_priority_only_intervals_refresh_time =
now +
min_high_priority_only_duration *
Expand All @@ -516,7 +544,7 @@ bool Scheduler::in_high_priority_only_interval(double now) {
}

bool Scheduler::treat_as_high_priority(RecordTask* t) {
return t->priority == 0;
return t->priority < CHAOS_MODE_LOW_PRIORITY;
}

void Scheduler::validate_scheduled_task() {
Expand Down Expand Up @@ -934,11 +962,14 @@ Scheduler::Rescheduled Scheduler::reschedule(Switchable switchable) {
must_run_task = next;
}

if (current_ && current_ != next && is_logging_enabled(LOG_debug, __FILE__)) {
LOGM(debug) << "Switching from " << current_->tid << "(" << current_->name()
<< ") to " << next->tid << "(" << next->name() << ") (priority "
<< current_->priority << " to " << next->priority << ") at "
<< current_->trace_writer().time();
if (current_ && current_ != next) {
notify_descheduled(current_);
if (is_logging_enabled(LOG_debug, __FILE__)) {
LOGM(debug) << "Switching from " << current_->tid << "(" << current_->name()
<< ") to " << next->tid << "(" << next->name() << ") (priority "
<< current_->priority << " to " << next->priority << ") at "
<< current_->trace_writer().time();
}
}

maybe_reset_high_priority_only_intervals(now);
Expand Down
1 change: 1 addition & 0 deletions src/Scheduler.h
Original file line number Diff line number Diff line change
Expand Up @@ -226,6 +226,7 @@ class Scheduler {
void maybe_pop_round_robin_task(RecordTask* t);
void setup_new_timeslice();
void maybe_reset_priorities(double now);
void notify_descheduled(RecordTask* t);
int choose_random_priority(RecordTask* t);
void update_task_priority_internal(RecordTask* t, int value);
void maybe_reset_high_priority_only_intervals(double now);
Expand Down
1 change: 1 addition & 0 deletions src/chaos-test/chaos-test.sh
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ cd `dirname $0`
./harness.py $1 200 200 pipe_wakeup
./harness.py $1 500 500 mmap_bits 7
./harness.py $1 500 500 mmap_adjacent 10
./harness.py $1 200 200 startup
./harness.py $1 100 200 starvation_singlethreaded 200000 202000 2000 1000000
./harness.py $1 100 200 starvation_singlethreaded 2000000 2400000 500000 5000000
./harness.py $1 400 800 starvation_multithreaded 200000 202000 2000 1000000
Expand Down
32 changes: 32 additions & 0 deletions src/chaos-test/startup.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
/* -*- Mode: C; tab-width: 8; c-basic-offset: 2; indent-tabs-mode: nil; -*- */

#include "chaosutil.h"

static int flag;
static pthread_mutex_t mutex;

static void* run_thread(__attribute__((unused)) void* p) {
struct timespec ts = { 1, 0 };
nanosleep(&ts, NULL);
pthread_mutex_lock(&mutex);
flag = 1;
pthread_mutex_unlock(&mutex);
return NULL;
}

int main(void) {
int i;
pthread_t thread;

pthread_mutex_init(&mutex, NULL);
pthread_create(&thread, NULL, run_thread, NULL);
pthread_mutex_lock(&mutex);
if (flag > 0) {
caught_test_failure("flag set");
}
pthread_mutex_unlock(&mutex);
pthread_join(thread, NULL);

atomic_puts("EXIT-SUCCESS");
return 0;
}

0 comments on commit 04c5add

Please sign in to comment.