Skip to content

Commit

Permalink
Thread balance (neuronsimulator#2351)
Browse files Browse the repository at this point in the history
* SectionList = ParallelContext.get_partition(i) are the root sections of ith thread.

* tweak and test ParallelContext.thread_ctime()

* Update src/nrnoc/multicore.cpp

* Update docs/python/modelspec/programmatic/network/parcon.rst

---------

Co-authored-by: Nicolas Cornu <[email protected]>
  • Loading branch information
nrnhines and alkino committed Jun 16, 2023
1 parent a9685ec commit 6889284
Show file tree
Hide file tree
Showing 6 changed files with 152 additions and 14 deletions.
2 changes: 1 addition & 1 deletion CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -676,7 +676,7 @@ if(NRN_ENABLE_DOCS)
sphinx
COMMAND ${NRN_DOCS_COMMAND_PREFIX} ${SPHINX_EXECUTABLE} -j auto -b html
"${PROJECT_SOURCE_DIR}/docs" "${PROJECT_SOURCE_DIR}/docs/_build"
COMMAND echo "Copy/Paste to Browser ${PROJECT_SOURCE_DIR}/docs/_build/index.html"
COMMAND echo "Copy/Paste to Browser file://${PROJECT_SOURCE_DIR}/docs/_build/index.html"
WORKING_DIRECTORY ${PROJECT_SOURCE_DIR}/docs
COMMENT "Generating documentation with Sphinx")

Expand Down
17 changes: 16 additions & 1 deletion docs/python/modelspec/programmatic/network/parcon.rst
Original file line number Diff line number Diff line change
Expand Up @@ -3206,6 +3206,21 @@ Parallel Transfer
----


.. method:: ParallelContext.get_partition


Syntax:
``seclist = pc.get_partition(i)``


Description:
Returns a new :func:`SectionList` with references to all the root sections
of the ith thread.


----



.. method:: ParallelContext.thread_stat

Expand Down Expand Up @@ -3290,7 +3305,7 @@ Parallel Transfer
The high resolution walltime time in seconds the indicated thread
used during time step integration. Note that this does not include
reduced tree computation time used by thread 0 when :func:`multisplit` is
active.
active. With no arg, sets thread_ctime of all threads to 0.


----
Expand Down
23 changes: 11 additions & 12 deletions src/nrnoc/fadvance.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -73,8 +73,8 @@ extern double hoc_epsilon;
#define NONVINT_ODE_COUNT 5

#if NRNCTIME
#define CTBEGIN double wt = nrnmpi_wtime();
#define CTADD nth->_ctime += nrnmpi_wtime() - wt;
#define CTBEGIN double wt = nrnmpi_wtime()
#define CTADD nth->_ctime += nrnmpi_wtime() - wt
#else
#define CTBEGIN /**/
#define CTADD /**/
Expand Down Expand Up @@ -457,13 +457,12 @@ static void nrn_fixed_step_group_thread(neuron::model_sorted_token const& cache_

static void nrn_fixed_step_thread(neuron::model_sorted_token const& cache_token, NrnThread& nt) {
auto* const nth = &nt;
double wt;
{
nrn::Instrumentor::phase p("deliver-events");
deliver_net_events(nth);
}

wt = nrnmpi_wtime();
CTBEGIN;
nrn_random_play();
#if ELIMINATE_T_ROUNDOFF
nt.nrn_ndt_ += .5;
Expand All @@ -485,7 +484,7 @@ static void nrn_fixed_step_thread(neuron::model_sorted_token const& cache_token,
nrn::Instrumentor::phase p("update");
nrn_update_voltage(cache_token, *nth);
}
CTADD
CTADD;
/*
To simplify the logic,
if there is no nrnthread_v_transfer then there cannot be an nrnmpi_v_transfer.
Expand All @@ -499,7 +498,7 @@ extern void nrn_extra_scatter_gather(int direction, int tid);

void nrn_fixed_step_lastpart(neuron::model_sorted_token const& cache_token, NrnThread& nt) {
auto* const nth = &nt;
CTBEGIN
CTBEGIN;
#if ELIMINATE_T_ROUNDOFF
nth->nrn_ndt_ += .5;
nth->_t = nrn_tbase_ + nth->nrn_ndt_ * nrn_dt_;
Expand All @@ -511,7 +510,8 @@ void nrn_fixed_step_lastpart(neuron::model_sorted_token const& cache_token, NrnT
nonvint(cache_token, nt);
nrn_ba(cache_token, nt, AFTER_SOLVE);
fixed_record_continuous(cache_token, nt);
CTADD {
CTADD;
{
nrn::Instrumentor::phase p("deliver-events");
nrn_deliver_events(nth); /* up to but not past texit */
}
Expand All @@ -520,9 +520,8 @@ void nrn_fixed_step_lastpart(neuron::model_sorted_token const& cache_token, NrnT
/* nrn_fixed_step_thread is split into three pieces */

void* nrn_ms_treeset_through_triang(NrnThread* nth) {
double wt;
deliver_net_events(nth);
wt = nrnmpi_wtime();
CTBEGIN;
nrn_random_play();
#if ELIMINATE_T_ROUNDOFF
nth->nrn_ndt_ += .5;
Expand All @@ -533,20 +532,20 @@ void* nrn_ms_treeset_through_triang(NrnThread* nth) {
fixed_play_continuous(nth);
setup_tree_matrix(nrn_ensure_model_data_are_sorted(), *nth);
nrn_multisplit_triang(nth);
CTADD
CTADD;
return nullptr;
}
void* nrn_ms_reduce_solve(NrnThread* nth) {
nrn_multisplit_reduce_solve(nth);
return nullptr;
}
void* nrn_ms_bksub(NrnThread* nth) {
CTBEGIN
CTBEGIN;
nrn_multisplit_bksub(nth);
second_order_cur(nth);
auto const cache_token = nrn_ensure_model_data_are_sorted();
nrn_update_voltage(cache_token, *nth);
CTADD
CTADD;
/* see above comment in nrn_fixed_step_thread */
if (!nrnthread_v_transfer_) {
nrn_fixed_step_lastpart(cache_token, *nth);
Expand Down
19 changes: 19 additions & 0 deletions src/nrnoc/multicore.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -917,6 +917,25 @@ void nrn_thread_partition(int it, Object* sl) {
v_structure_change = 1;
}

Object** nrn_get_thread_partition(int it) {
assert(it >= 0 && it < nrn_nthread);
NrnThread* nt = nrn_threads + it;
if (!nt->roots) {
v_setup_vectors();
}
// nt->roots is a hoc_List of Section*. Create a new SectionList and copy
// those Section* into it and ref them.
hoc_List* sl = hoc_l_newlist();
Object** po = hoc_temp_objvar(hoc_lookup("SectionList"), sl);
hoc_Item* qsec;
ITERATE(qsec, nt->roots) {
Section* sec = hocSEC(qsec);
section_ref(sec);
hoc_l_lappendsec(sl, sec);
}
return po;
}

int nrn_user_partition() {
int i, it, b, n;
hoc_Item* qsec;
Expand Down
7 changes: 7 additions & 0 deletions src/parallel/ocbbs.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,7 @@ static void nrnmpi_dbl_broadcast(double*, int, int) {}
extern double* nrn_mech_wtime_;
extern int nrn_nthread;
extern void nrn_thread_partition(int, Object*);
extern Object** nrn_get_thread_partition(int);
extern int nrn_allow_busywait(int);
extern int nrn_how_many_processors();
extern size_t nrncore_write();
Expand Down Expand Up @@ -935,6 +936,11 @@ static double partition(void*) {
return 0.0;
}

static Object** get_partition(void*) {
return nrn_get_thread_partition(int(chkarg(1, 0, nrn_nthread - 1)));
;
}

static double thread_stat(void*) {
// nrn_thread_stat was called here but didn't do anything
return 0.0;
Expand Down Expand Up @@ -1110,6 +1116,7 @@ static Member_ret_obj_func retobj_members[] = {{"upkvec", upkvec},
{"gid2obj", gid2obj},
{"gid2cell", gid2cell},
{"gid_connect", gid_connect},
{"get_partition", get_partition},
{"upkpyobj", upkpyobj},
{"pyret", pyret},
{"py_alltoall", py_alltoall},
Expand Down
98 changes: 98 additions & 0 deletions test/hoctests/tests/test_thread_partition.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,98 @@
from neuron import h
from neuron.expect_hocerr import expect_err, set_quiet

set_quiet(False)
pc = h.ParallelContext()


class Cell:
def __init__(self, id):
self.id = id
self.secs = [h.Section(name="d_" + str(i), cell=self) for i in range(3)]
s = self.secs
for i in range(1, len(s)):
s[i].connect(s[i - 1](1))
s[i].nseg = 11
s[i].insert("hh")

def __str__(self):
return "Cell_" + str(self.id)


def prroots():
print("prroots")
sr = h.SectionList()
sr.allroots()
for s in sr:
print(s)


def prpart():
for ith in range(pc.nthread()):
sl = pc.get_partition(ith)
for sec in sl:
print(ith, sec, sec.cell().id)


def assertpart(parts="default"):
if str(parts) == "default": # not round-robin but root order
roots = h.SectionList()
roots.allroots()
roots = [root for root in roots]
i = 0
for ith in range(pc.nthread()):
sl = pc.get_partition(ith)
for sec in sl:
assert sec == roots[i]
i += 1
else: # equal to the parts
assert len(parts) == pc.nthread()
for ith in range(pc.nthread()):
sl = pc.get_partition(ith)
a = [sec for sec in pc.get_partition(ith)]
b = [sec for sec in parts[ith]]
assert a == b


def test_default():
assertpart("default")

cells = [Cell(i) for i in range(5)]
assertpart("default")

pc.nthread(3)
assertpart("default")

pc.nthread(2)
assertpart("default")


def test_parts():
cells = [Cell(i) for i in range(10)]
r = h.Random()
r.Random123(1, 0, 0)
nt = 3
pc.nthread(nt)
r.discunif(0, nt - 1)
parts = [h.SectionList() for _ in range(nt)]
for cell in cells:
parts[int(r.repick())].append(cell.secs[0])
for i in range(nt):
pc.partition(i, parts[i])
assertpart(parts)

def run(tstop):
pc.thread_ctime() # all theads 0
pc.set_maxstep(10)
h.finitialize(-65)
pc.psolve(tstop)

run(20)
print("ith ncell thread_ctime")
for ith in range(pc.nthread()):
print(ith, len([1 for _ in parts[ith]]), pc.thread_ctime(ith))


if __name__ == "__main__":
test_default()
test_parts()

0 comments on commit 6889284

Please sign in to comment.