Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add not-in-use computing prefs #4871

Merged
merged 23 commits into from
Sep 21, 2022
Merged
Show file tree
Hide file tree
Changes from 8 commits
Commits
Show all changes
23 commits
Select commit Hold shift + click to select a range
3b1ebfd
Add new "not in use" (niu) prefs
davidpanderson Aug 2, 2022
3ce9291
change "Tasks" to "General"
davidpanderson Aug 6, 2022
50f56c5
Manager: restructure compute prefs; add not-in-use prefs
davidpanderson Aug 6, 2022
b6570bc
Manager: change advanced prefs dialog to include not-in-use prefs
davidpanderson Aug 7, 2022
5789724
Manager: add checkbox for "suspend if no recent input pref".
davidpanderson Aug 7, 2022
51bfc0a
client: if new not-in-use prefs aren't specified in XML file,
davidpanderson Aug 7, 2022
7d24c02
Client and Manager: debug computing prefs.
davidpanderson Aug 7, 2022
588ab8f
Web: in computing prefs, move swap limit to Disk section
davidpanderson Aug 7, 2022
7fe3166
Manager: gcc compile fix
davidpanderson Aug 7, 2022
c050459
sim: fix build error
davidpanderson Aug 7, 2022
70285e2
web: add "requires BOINC 7.20.3+" for new prefs
davidpanderson Aug 7, 2022
d8e6d4e
client: print global prefs more completely (e.g. show not-in-use prefs)
davidpanderson Aug 10, 2022
3b73baa
client: put - at start of leading whitespace in messages, else it get…
davidpanderson Aug 10, 2022
5df4b27
Manager: shorten prefs dialog
davidpanderson Aug 12, 2022
482da2b
Disable other "while in use" options if "Suspend when computer is in …
Aug 14, 2022
73ec053
Add missing xgettext flag for non-format use of "%" symbol
Aug 14, 2022
22aa492
Fix typo in method names
Aug 14, 2022
27aff32
disable feature
davidpanderson Aug 14, 2022
ca48a69
Merge branch 'dpa_niu_prefs' of https://github.com/BOINC/boinc into d…
davidpanderson Aug 14, 2022
9ee282f
web: if not-in-use prefs undefined, use in-use values
davidpanderson Sep 1, 2022
e5367f8
Merge branch 'dpa_niu_prefs' of https://github.com/BOINC/boinc into d…
davidpanderson Sep 1, 2022
28e9114
client and manager: when parsing NIU prefs, immediately chanee 0 to 100.
davidpanderson Sep 5, 2022
f627b09
Manager: fix handling when disabled items are enabled
Sep 6, 2022
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 5 additions & 7 deletions client/app.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -657,9 +657,9 @@ int ACTIVE_TASK::get_free_slot(RESULT* rp) {

// paranoia - don't allow unbounded slots
//
if (j > gstate.ncpus*100) {
if (j > gstate.n_usable_cpus*100) {
msg_printf(rp->project, MSG_INTERNAL_ERROR,
"exceeded limit of %d slot directories", gstate.ncpus*100
"exceeded limit of %d slot directories", gstate.n_usable_cpus*100
);
return ERR_NULL;
}
Expand Down Expand Up @@ -1205,16 +1205,14 @@ void* throttler(void*) {

while (1) {
client_mutex.lock();
if (gstate.tasks_suspended
|| gstate.global_prefs.cpu_usage_limit > 99
|| gstate.global_prefs.cpu_usage_limit < 0.005
) {
double limit = gstate.current_cpu_usage_limit();
if (gstate.tasks_suspended || limit == 0) {
client_mutex.unlock();
// ::Sleep((int)(1000*10)); // for Win debugging
boinc_sleep(10);
continue;
}
double on, off, on_frac = gstate.global_prefs.cpu_usage_limit / 100;
double on, off, on_frac = limit / 100;
#if 0
// sub-second CPU throttling
// DOESN'T WORK BECAUSE OF 1-SEC API POLL
Expand Down
2 changes: 1 addition & 1 deletion client/app_control.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1546,7 +1546,7 @@ void ACTIVE_TASK_SET::get_msgs() {
last_time = gstate.now;

double et_diff = delta_t;
double et_diff_throttle = delta_t * gstate.global_prefs.cpu_usage_limit/100;
double et_diff_throttle = delta_t * gstate.current_cpu_usage_limit()/100;

for (i=0; i<active_tasks.size(); i++) {
atp = active_tasks[i];
Expand Down
10 changes: 6 additions & 4 deletions client/client_state.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -156,7 +156,7 @@ CLIENT_STATE::CLIENT_STATE()
redirect_io = false;
disable_graphics = false;
cant_write_state_file = false;
ncpus = 1;
n_usable_cpus = 1;
benchmarks_running = false;
client_disk_usage = 0.0;
total_disk_usage = 0.0;
Expand Down Expand Up @@ -202,8 +202,8 @@ void CLIENT_STATE::show_host_info() {
"Processor: %d %s %s",
host_info.p_ncpus, host_info.p_vendor, host_info.p_model
);
if (ncpus != host_info.p_ncpus) {
msg_printf(NULL, MSG_INFO, "Using %d CPUs", ncpus);
if (n_usable_cpus != host_info.p_ncpus) {
msg_printf(NULL, MSG_INFO, "Using %d CPUs", n_usable_cpus);
}
#if 0
if (host_info.m_cache > 0) {
Expand Down Expand Up @@ -626,7 +626,7 @@ int CLIENT_STATE::init() {
//
host_info.p_vm_extensions_disabled = false;

set_ncpus();
set_n_usable_cpus();
show_host_info();

// this follows parse_state_file() because that's where we read project names
Expand Down Expand Up @@ -992,6 +992,8 @@ bool CLIENT_STATE::poll_slow_events() {
#endif

if (user_active != old_user_active) {
set_n_usable_cpus();
// if niu_max_ncpus_pct pref is set, # usable CPUs may change
request_schedule_cpus(user_active?"Not idle":"Idle");
}

Expand Down
34 changes: 24 additions & 10 deletions client/client_state.h
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,6 @@
#ifndef BOINC_CLIENT_STATE_H
#define BOINC_CLIENT_STATE_H

#define NEW_CPU_THROTTLE
// do CPU throttling using a separate thread.
// This makes it possible to throttle faster than the client's 1-sec poll period
// NOTE: we can't actually do this because the runtime system's
Expand All @@ -35,9 +34,7 @@ using std::vector;

#include "coproc.h"
#include "util.h"
#ifdef NEW_CPU_THROTTLE
#include "thread.h"
#endif

#include "acct_mgr.h"
#include "acct_setup.h"
Expand Down Expand Up @@ -345,7 +342,7 @@ struct CLIENT_STATE {
// - an app fails to start (CS::schedule_cpus())
// - any project op is done via RPC (suspend/resume)
// - any result op is done via RPC (suspend/resume)
void set_ncpus();
void set_n_usable_cpus();

// --------------- cs_account.cpp:
int add_project(
Expand All @@ -363,12 +360,13 @@ struct CLIENT_STATE {
double get_fraction_done(RESULT* result);
int input_files_available(RESULT*, bool, FILE_INFO** f=0);
ACTIVE_TASK* lookup_active_task_by_result(RESULT*);
int ncpus;
// Act like there are this many CPUs.
int n_usable_cpus;
// number of usable CPUs
// By default this is the # of physical CPUs,
// but it can be changed in two ways:
// - type <ncpus>N</ncpus> in the config file
// - type the max_ncpus_pct pref
// - <ncpus>N</ncpus> in cc_config.xml
// (for debugging; can be > # physical CPUs)
// - the max_ncpus_pct and niu_max_ncpus_pct prefs

int latest_version(APP*, char*);
int app_finished(ACTIVE_TASK&);
Expand Down Expand Up @@ -521,6 +519,24 @@ struct CLIENT_STATE {
#endif

KEYWORDS keywords;

double current_cpu_usage_limit() {
davidpanderson marked this conversation as resolved.
Show resolved Hide resolved
double x = global_prefs.cpu_usage_limit;
if (!user_active && global_prefs.niu_cpu_usage_limit>=0) {
x = global_prefs.niu_cpu_usage_limit;
}
if (x < 0.005 || x > 99) {
davidpanderson marked this conversation as resolved.
Show resolved Hide resolved
x = 100;
}
return x;
}
double current_suspend_cpu_usage() {
davidpanderson marked this conversation as resolved.
Show resolved Hide resolved
double x = global_prefs.suspend_cpu_usage;
if (!user_active && global_prefs.niu_suspend_cpu_usage>=0) {
x = global_prefs.niu_suspend_cpu_usage;
}
return x;
}
};

extern CLIENT_STATE gstate;
Expand All @@ -535,10 +551,8 @@ extern double calculate_exponential_backoff(
int n, double MIN, double MAX
);

#ifdef NEW_CPU_THROTTLE
extern THREAD_LOCK client_mutex;
extern THREAD throttle_thread;
#endif

//////// TIME-RELATED CONSTANTS ////////////

Expand Down
56 changes: 31 additions & 25 deletions client/cpu_sched.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -95,7 +95,7 @@ struct PROC_RESOURCES {
COPROCS pr_coprocs;

void init() {
ncpus = gstate.ncpus;
ncpus = gstate.n_usable_cpus;
ncpus_used_st = 0;
ncpus_used_mt = 0;
pr_coprocs.clone(coprocs, false);
Expand Down Expand Up @@ -567,7 +567,7 @@ void CLIENT_STATE::reset_rec_accounting() {
//
static void update_rec() {
double f = gstate.host_info.p_fpops;
double on_frac = gstate.global_prefs.cpu_usage_limit / 100;
double on_frac = gstate.current_cpu_usage_limit() / 100;

for (unsigned int i=0; i<gstate.projects.size(); i++) {
PROJECT* p = gstate.projects[i];
Expand Down Expand Up @@ -628,7 +628,7 @@ double total_peak_flops() {
static double tpf;
if (first) {
first = false;
tpf = gstate.host_info.p_fpops * gstate.ncpus;
tpf = gstate.host_info.p_fpops * gstate.n_usable_cpus;
for (int i=1; i<coprocs.n_rsc; i++) {
COPROC& cp = coprocs.coprocs[i];
tpf += rsc_work_fetch[i].relative_speed * gstate.host_info.p_fpops * cp.count;
Expand Down Expand Up @@ -1013,7 +1013,7 @@ static void promote_multi_thread_jobs(vector<RESULT*>& runnable_jobs) {
vector<RESULT*>::iterator cur = runnable_jobs.begin();
while(1) {
if (cur == runnable_jobs.end()) break;
if (cpus_used >= gstate.ncpus) break;
if (cpus_used >= gstate.n_usable_cpus) break;
RESULT* rp = *cur;
if (rp->rr_sim_misses_deadline) break;
double nc = rp->avp->avg_ncpus;
Expand Down Expand Up @@ -1251,9 +1251,9 @@ bool CLIENT_STATE::enforce_run_list(vector<RESULT*>& run_list) {
// don't allow additional CPU jobs;
// allow coproc jobs if the resulting CPU load is at most ncpus+1
//
if (ncpus_used >= ncpus) {
if (ncpus_used >= n_usable_cpus) {
if (rp->uses_coprocs()) {
if (ncpus_used + rp->avp->avg_ncpus > ncpus+1) {
if (ncpus_used + rp->avp->avg_ncpus > n_usable_cpus+1) {
if (log_flags.cpu_sched_debug) {
msg_printf(rp->project, MSG_INFO,
"[cpu_sched_debug] skipping GPU job %s; CPU committed",
Expand All @@ -1266,7 +1266,7 @@ bool CLIENT_STATE::enforce_run_list(vector<RESULT*>& run_list) {
if (log_flags.cpu_sched_debug) {
msg_printf(rp->project, MSG_INFO,
"[cpu_sched_debug] all CPUs used (%.2f >= %d), skipping %s",
ncpus_used, ncpus,
ncpus_used, n_usable_cpus,
rp->name
);
}
Expand Down Expand Up @@ -1350,11 +1350,11 @@ bool CLIENT_STATE::enforce_run_list(vector<RESULT*>& run_list) {
}
}

if (log_flags.cpu_sched_debug && ncpus_used < ncpus) {
if (log_flags.cpu_sched_debug && ncpus_used < n_usable_cpus) {
msg_printf(0, MSG_INFO, "[cpu_sched_debug] using %.2f out of %d CPUs",
ncpus_used, ncpus
ncpus_used, n_usable_cpus
);
if (ncpus_used < ncpus) {
if (ncpus_used < n_usable_cpus) {
request_work_fetch("CPUs idle");
}
}
Expand Down Expand Up @@ -1622,12 +1622,14 @@ ACTIVE_TASK* CLIENT_STATE::get_task(RESULT* rp) {
return atp;
}

// called at startup (after get_host_info())
// and when general prefs have been parsed.
// NOTE: GSTATE.NCPUS MUST BE 1 OR MORE; WE DIVIDE BY IT IN A COUPLE OF PLACES
// called:
// - at startup (after get_host_info())
// - when general prefs have been parsed
// - when user_active changes
// NOTE: n_usable_cpus MUST BE 1 OR MORE; WE DIVIDE BY IT IN A COUPLE OF PLACES
//
void CLIENT_STATE::set_ncpus() {
int ncpus_old = ncpus;
void CLIENT_STATE::set_n_usable_cpus() {
int ncpus_old = n_usable_cpus;

// config file can say to act like host has N CPUs
//
Expand All @@ -1638,25 +1640,29 @@ void CLIENT_STATE::set_ncpus() {
first = false;
}
if (cc_config.ncpus>0) {
ncpus = cc_config.ncpus;
host_info.p_ncpus = ncpus; // use this in scheduler requests
n_usable_cpus = cc_config.ncpus;
host_info.p_ncpus = n_usable_cpus; // use this in scheduler requests
} else {
host_info.p_ncpus = original_p_ncpus;
ncpus = host_info.p_ncpus;
n_usable_cpus = host_info.p_ncpus;
}
if (ncpus <= 0) {
ncpus = 1; // shouldn't happen

double p = global_prefs.max_ncpus_pct;
if (!user_active && global_prefs.niu_max_ncpus_pct>=0) {
p = global_prefs.niu_max_ncpus_pct;
}
if (p) {
n_usable_cpus = (int)((n_usable_cpus * p)/100);
}

if (global_prefs.max_ncpus_pct) {
ncpus = (int)((ncpus * global_prefs.max_ncpus_pct)/100);
if (ncpus == 0) ncpus = 1;
if (n_usable_cpus <= 0) {
n_usable_cpus = 1;
}

if (initialized && ncpus != ncpus_old) {
if (initialized && n_usable_cpus != ncpus_old) {
msg_printf(0, MSG_INFO,
"Number of usable CPUs has changed from %d to %d.",
ncpus_old, ncpus
ncpus_old, n_usable_cpus
);
request_schedule_cpus("Number of usable CPUs has changed");
request_work_fetch("Number of usable CPUs has changed");
Expand Down
4 changes: 2 additions & 2 deletions client/cs_benchmark.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -262,9 +262,9 @@ void CLIENT_STATE::start_cpu_benchmarks(bool force) {
cpu_benchmarks_start = dtime();

benchmark_descs.clear();
benchmark_descs.resize(ncpus);
benchmark_descs.resize(n_usable_cpus);

bm_ncpus = ncpus;
bm_ncpus = n_usable_cpus;
benchmarks_running = true;

for (i=0; i<bm_ncpus; i++) {
Expand Down
36 changes: 11 additions & 25 deletions client/cs_prefs.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -253,11 +253,11 @@ int CLIENT_STATE::check_suspend_processing() {
// if we suspended because of CPU usage,
// don't unsuspend for at least 2*MEMORY_USAGE_PERIOD
//
if (global_prefs.suspend_cpu_usage) {
if (current_suspend_cpu_usage()) {
if (now < last_cpu_usage_suspend+2*MEMORY_USAGE_PERIOD) {
return SUSPEND_REASON_CPU_USAGE;
}
if (non_boinc_cpu_usage*100 > global_prefs.suspend_cpu_usage) {
if (non_boinc_cpu_usage*100 > current_suspend_cpu_usage()) {
last_cpu_usage_suspend = now;
return SUSPEND_REASON_CPU_USAGE;
}
Expand Down Expand Up @@ -301,26 +301,6 @@ int CLIENT_STATE::check_suspend_processing() {
}
#endif

#ifndef NEW_CPU_THROTTLE
// CPU throttling.
// Do this check last; that way if suspend_reason is CPU_THROTTLE,
// the GUI knows there's no other source of suspension
//
if (global_prefs.cpu_usage_limit < 99) { // round-off?
static double last_time=0, debt=0;
double diff = now - last_time;
last_time = now;
if (diff >= POLL_INTERVAL/2. && diff < POLL_INTERVAL*10.) {
debt += diff*global_prefs.cpu_usage_limit/100;
if (debt < 0) {
return SUSPEND_REASON_CPU_THROTTLE;
} else {
debt -= diff;
}
}
}
#endif

// CPU is not suspended. See if GPUs are
//
if (!coprocs.none()) {
Expand Down Expand Up @@ -686,10 +666,10 @@ void CLIENT_STATE::read_global_prefs(
#endif
// max_cpus, bandwidth limits may have changed
//
set_ncpus();
if (ncpus != host_info.p_ncpus) {
set_n_usable_cpus();
if (n_usable_cpus != host_info.p_ncpus) {
msg_printf(NULL, MSG_INFO,
" max CPUs used: %d", ncpus
" max CPUs used: %d", n_usable_cpus
davidpanderson marked this conversation as resolved.
Show resolved Hide resolved
);
}
if (!global_prefs.run_if_user_active) {
Expand All @@ -709,6 +689,12 @@ void CLIENT_STATE::read_global_prefs(
global_prefs.suspend_cpu_usage
);
}
if (global_prefs.niu_suspend_cpu_usage > 0) {
msg_printf(NULL, MSG_INFO,
" when idle, suspend work if non-BOINC CPU load exceeds %.0f%%",
davidpanderson marked this conversation as resolved.
Show resolved Hide resolved
global_prefs.niu_suspend_cpu_usage
);
}
if (global_prefs.max_bytes_sec_down) {
msg_printf(NULL, MSG_INFO,
" max download rate: %.0f bytes/sec",
Expand Down
2 changes: 1 addition & 1 deletion client/cs_scheduler.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -211,7 +211,7 @@ int CLIENT_STATE::make_scheduler_request(PROJECT* p) {
// update hardware info, and write host info
//
host_info.get_host_info(false);
set_ncpus();
set_n_usable_cpus();
host_info.write(mf, !cc_config.suppress_net_info, false);

// get and write disk usage
Expand Down
Loading