diff --git a/.clang-format b/.clang-format index 33bf2a3b..d589eab7 100644 --- a/.clang-format +++ b/.clang-format @@ -12,12 +12,12 @@ AlignTrailingComments: true AllowAllArgumentsOnNextLine: true AllowAllConstructorInitializersOnNextLine: true AllowAllParametersOfDeclarationOnNextLine: true -AllowShortBlocksOnASingleLine: Never -AllowShortCaseLabelsOnASingleLine: false -AllowShortFunctionsOnASingleLine: All -AllowShortLambdasOnASingleLine: All -AllowShortIfStatementsOnASingleLine: Never -AllowShortLoopsOnASingleLine: false +AllowShortBlocksOnASingleLine: true +AllowShortCaseLabelsOnASingleLine: true +AllowShortFunctionsOnASingleLine: true +AllowShortLambdasOnASingleLine: true +AllowShortIfStatementsOnASingleLine: true +AllowShortLoopsOnASingleLine: true AlwaysBreakAfterDefinitionReturnType: None AlwaysBreakAfterReturnType: None AlwaysBreakBeforeMultilineStrings: false diff --git a/Makefile b/Makefile index 60a845f8..05d4c77b 100644 --- a/Makefile +++ b/Makefile @@ -1,5 +1,5 @@ CC ?= mpicc -CFLAGS ?= -Wall -Wextra -Wpedantic -Wno-unused-function -Wno-unused-parameter -std=c99 +CFLAGS ?= -Wall -Wextra -Wpedantic -Wno-unused-function -Wno-unused-parameter -std=c99 -g LDFLAGS ?= DEBUG ?= 0 MPI ?= 1 diff --git a/examples/gencon.c b/examples/gencon.c index f989622b..51fadf69 100644 --- a/examples/gencon.c +++ b/examples/gencon.c @@ -21,8 +21,7 @@ static void test_parcon(unsigned int neltp, long long *vlp, char *name, struct gs_data *gsh = gs_setup(vls, size, &c, 0, gs_pairwise, 0); uint i; - for (i = 0; i < size; i++) - minp[i] = maxp[i] = vlp[i]; + for (i = 0; i < size; i++) minp[i] = maxp[i] = vlp[i]; buffer bfr; buffer_init(&bfr, 1024); @@ -40,8 +39,7 @@ static void test_parcon(unsigned int neltp, long long *vlp, char *name, gsh = gs_setup(vlp, size, &c, 0, gs_pairwise, 0); - for (i = 0; i < size; i++) - minp[i] = maxp[i] = vls[i]; + for (i = 0; i < size; i++) minp[i] = maxp[i] = vls[i]; gs(minp, gs_long, gs_min, 0, gsh, &bfr); gs(maxp, gs_long, gs_max, 0, gsh, &bfr); @@ -98,8 +96,7 @@ int main(int argc, char *argv[]) { } // Turns on testing if test is on - if (in->test) - test_parcon(nelt, vl, in->mesh, world); + if (in->test) test_parcon(nelt, vl, in->mesh, world); // Free resources free(vl), free(coord), free(bcs); diff --git a/examples/genmap.c b/examples/genmap.c index 90941149..1e753cb4 100644 --- a/examples/genmap.c +++ b/examples/genmap.c @@ -13,8 +13,7 @@ int main(int argc, char *argv[]) { int rank, size; MPI_Comm_rank(world, &rank); MPI_Comm_size(world, &size); - if (in->nactive > size) - in->nactive = size; + if (in->nactive > size) in->nactive = size; MPI_Comm comm; MPI_Comm_split(world, rank < in->nactive, rank, &comm); diff --git a/src/components.c b/src/components.c index 3efb84fb..4679d381 100644 --- a/src/components.c +++ b/src/components.c @@ -12,19 +12,16 @@ uint get_components(sint *component, struct array *elems, unsigned nv, comm_scan(out, c, gs_long, gs_add, &in, 1, wrk); ulong nelg = out[1][0], start = out[0][0]; - if (nelg == 0) - return 0; + if (nelg == 0) return 0; uint nev = nelt * nv; slong *p = tcalloc(slong, nev); slong *ids = tcalloc(slong, nev); int null_input = (component == NULL); - if (null_input) - component = tcalloc(sint, nelt); + if (null_input) component = tcalloc(sint, nelt); - for (uint e = 0; e < nelt; e++) - component[e] = -1; + for (uint e = 0; e < nelt; e++) component[e] = -1; struct unmarked { uint index; @@ -55,8 +52,7 @@ uint get_components(sint *component, struct array *elems, unsigned nv, if (bin == 1) { // Initialize p for (uint e = 0; e < arr.n; e++) - for (uint d = 0; d < nv; d++) - p[e * nv + d] = 0; + for (uint d = 0; d < nv; d++) p[e * nv + d] = 0; // Mark the first non-marked element as seed struct unmarked *ptr = (struct unmarked *)arr.ptr; @@ -65,8 +61,7 @@ uint get_components(sint *component, struct array *elems, unsigned nv, comm_allreduce(&cc, gs_long, gs_min, &mfirst, 1, wrk); if (mfirst == first) { - for (uint d = 0; d < nv; d++) - p[0 * nv + d] = 1; + for (uint d = 0; d < nv; d++) p[0 * nv + d] = 1; } // Setup gs @@ -91,8 +86,7 @@ uint get_components(sint *component, struct array *elems, unsigned nv, } // There was one non-zero vertex in the element if (d < nv) { - for (d = 0; d < nv; d++) - p[e * nv + d] = 1; + for (d = 0; d < nv; d++) p[e * nv + d] = 1; } } @@ -110,8 +104,7 @@ uint get_components(sint *component, struct array *elems, unsigned nv, array_free(&arr); free(p), free(ids); - if (null_input == 1) - free(component); + if (null_input == 1) free(component); return count; } @@ -148,8 +141,7 @@ static sint find_or_insert(struct array *cids, struct cmp_t *t) { } uint n = mid; - if (t->c > pc[mid].c) - n = mid + 1; + if (t->c > pc[mid].c) n = mid + 1; struct cmp_t t0 = *t, t1; for (; n < cids->n; n++) { @@ -160,8 +152,7 @@ static sint find_or_insert(struct array *cids, struct cmp_t *t) { pc[n] = t0, cids->n++; // Sanity check. - for (unsigned i = 1; i < cids->n; i++) - assert(pc[i - 1].c < pc[i].c); + for (unsigned i = 1; i < cids->n; i++) assert(pc[i - 1].c < pc[i].c); return -1; } @@ -175,8 +166,7 @@ uint get_components_v2(sint *component, struct array *elems, unsigned nv, comm_scan(out, ci, gs_long, gs_add, &in, 1, wrk); ulong nelg = out[1][0]; - if (nelg == 0) - return 0; + if (nelg == 0) return 0; const uint nev = nelt * nv; sint *p0 = tcalloc(sint, nev); @@ -185,11 +175,9 @@ uint get_components_v2(sint *component, struct array *elems, unsigned nv, uint *inds = tcalloc(uint, nev); int null_input = (component == NULL); - if (null_input) - component = tcalloc(sint, nelt); + if (null_input) component = tcalloc(sint, nelt); - for (uint e = 0; e < nelt; e++) - component[e] = -1; + for (uint e = 0; e < nelt; e++) component[e] = -1; struct comm c; ulong nmkd = 0; @@ -200,8 +188,7 @@ uint get_components_v2(sint *component, struct array *elems, unsigned nv, for (uint e = 0; e < nelt; e++) { if (component[e] == -1) { inds[unmkd] = e; - for (uint v = 0; v < nv; v++) - ids[unmkd * nv + v] = pe[e].vertices[v]; + for (uint v = 0; v < nv; v++) ids[unmkd * nv + v] = pe[e].vertices[v]; unmkd++; } } @@ -212,13 +199,11 @@ uint get_components_v2(sint *component, struct array *elems, unsigned nv, slong nnzg = 0, ncg = 0; if (bin == 1) { // Mark the first unmarked element as seed for the component c.id. - for (uint v = 0; v < nv; v++) - p[0 * nv + v] = c.id; + for (uint v = 0; v < nv; v++) p[0 * nv + v] = c.id; // Initialize the rest of p. for (uint e = 1; e < unmkd; e++) - for (uint v = 0; v < nv; v++) - p[e * nv + v] = -1; + for (uint v = 0; v < nv; v++) p[e * nv + v] = -1; // Setup gather-scatter to do BFS. struct gs_data *gsh = gs_setup(ids, unmkd * nv, &c, 0, gs_pairwise, 0); @@ -226,8 +211,7 @@ uint get_components_v2(sint *component, struct array *elems, unsigned nv, // Perform BFS. sint changed; do { - for (uint i = 0; i < unmkd * nv; i++) - p0[i] = p[i]; + for (uint i = 0; i < unmkd * nv; i++) p0[i] = p[i]; gs(p, gs_int, gs_max, 0, gsh, bfr); @@ -248,8 +232,7 @@ uint get_components_v2(sint *component, struct array *elems, unsigned nv, // the element with that value. if (v0 > -1) { sint c = p[e * nv + v0]; - for (uint v = 0; v < nv; v++) - p[e * nv + v] = c; + for (uint v = 0; v < nv; v++) p[e * nv + v] = c; nnz++; } @@ -296,8 +279,7 @@ uint get_components_v2(sint *component, struct array *elems, unsigned nv, cnt++; struct cmp_t *pc = (struct cmp_t *)cids.ptr; for (uint i = 1; i < cids.n; i++) { - if (pc[i].c > pc[i - 1].c) - cnt++; + if (pc[i].c > pc[i - 1].c) cnt++; } } @@ -310,8 +292,7 @@ uint get_components_v2(sint *component, struct array *elems, unsigned nv, struct cmp_t *pc = (struct cmp_t *)cids.ptr; pc[0].uid = s; for (uint i = 1; i < cids.n; i++) { - if (pc[i].c > pc[i - 1].c) - s++; + if (pc[i].c > pc[i - 1].c) s++; pc[i].uid = s; } } @@ -340,8 +321,7 @@ uint get_components_v2(sint *component, struct array *elems, unsigned nv, nc += ncg; } while (nmkd < nelg); - if (null_input == 1) - free(component); + if (null_input == 1) free(component); free(p0), free(p), free(ids), free(inds); return nc; diff --git a/src/con-check.c b/src/con-check.c index dfe9e86a..c03541e3 100644 --- a/src/con-check.c +++ b/src/con-check.c @@ -127,8 +127,7 @@ static VToEMap *getVToEMap(Mesh m, struct comm *c, buffer *bfr) { crystal_free(&cr); // create the map - if (a.n == 0) - return NULL; + if (a.n == 0) return NULL; VToEMap *map = calloc(1, sizeof(VToEMap)); map->elements = calloc(a.n, sizeof(ulong)); @@ -136,8 +135,7 @@ static VToEMap *getVToEMap(Mesh m, struct comm *c, buffer *bfr) { uint nGIds = 1, prev = 0; vertex *aPtr = (vertex *)a.ptr; for (i = 1; i < a.n; i++) { - if (aPtr[i].vertexId != aPtr[prev].vertexId) - nGIds++; + if (aPtr[i].vertexId != aPtr[prev].vertexId) nGIds++; prev = i; } @@ -185,8 +183,7 @@ static uint getPosition(VToEMap *map, ulong key) { begin = mid; }; - if (globalIds[mid] != key) - return UINT_MAX; + if (globalIds[mid] != key) return UINT_MAX; return mid; } @@ -275,8 +272,7 @@ int element_check(Mesh mesh, struct comm *c, buffer *bfr) { uint i, j; int err = 0; for (i = 0; i < nelt && err == 0; i++) { - for (j = 0; j < nv; j++) - globalIds[j].id = ptr[i * nv + j].globalId + 1; + for (j = 0; j < nv; j++) globalIds[j].id = ptr[i * nv + j].globalId + 1; sarray_sort(LongID, globalIds, nv, id, 1, bfr); diff --git a/src/con-periodic.c b/src/con-periodic.c index b0c61dc8..f0b5d78a 100644 --- a/src/con-periodic.c +++ b/src/con-periodic.c @@ -49,8 +49,7 @@ static int compressPeriodicVertices(Mesh mesh, struct comm *c, buffer *bfr) { comm_scan(out, c, gs_long, gs_add, in, 1, buf); slong start = out[0][0]; - for (i = 0; i < npoints; i++) - points[i].globalId += start; + for (i = 0; i < npoints; i++) points[i].globalId += start; return 0; } @@ -58,8 +57,7 @@ static int compressPeriodicVertices(Mesh mesh, struct comm *c, buffer *bfr) { static ulong findMinBelowI(ulong min, uint I, struct array *arr) { struct mpair_t *ptr = (struct mpair_t *)arr->ptr; for (uint i = 0; i < I; i++) - if (ptr[i].orig == min) - return ptr[i].min; + if (ptr[i].orig == min) return ptr[i].min; return min; } @@ -71,43 +69,34 @@ static int renumberPeriodicVertices(Mesh mesh, struct comm *c, *mcur = tcalloc(slong, size1); struct point_t *pe = (struct point_t *)mesh->elements.ptr; - for (uint i = 0; i < size1; i++) - mids[i] = pe[i].globalId; + for (uint i = 0; i < size1; i++) mids[i] = pe[i].globalId; struct mpair_t *pm = (struct mpair_t *)matched->ptr; - for (uint i = 0; i < size2; i++) - mids[size1 + i] = pm[i].orig; + for (uint i = 0; i < size2; i++) mids[size1 + i] = pm[i].orig; struct gs_data *gsh = gs_setup(mids, size1 + size2, c, 0, gs_pairwise, 0); - for (uint i = 0; i < size1; i++) - mnew[i] = pe[i].globalId; - for (uint i = 0; i < size2; i++) - mnew[size1 + i] = pm[i].min; + for (uint i = 0; i < size1; i++) mnew[i] = pe[i].globalId; + for (uint i = 0; i < size2; i++) mnew[size1 + i] = pm[i].min; gs(mnew, gs_long, gs_min, 0, gsh, bfr); sint changed, wrk; do { - for (uint i = 0; i < size1; i++) - mcur[i] = mnew[i]; - for (uint i = 0; i < size2; i++) - mids[size1 + size2 + i] = -mnew[size1 + i]; + for (uint i = 0; i < size1; i++) mcur[i] = mnew[i]; + for (uint i = 0; i < size2; i++) mids[size1 + size2 + i] = -mnew[size1 + i]; struct gs_data *gsh1 = gs_setup(mids, size1 + 2 * size2, c, 0, gs_pairwise, 0); gs(mnew, gs_long, gs_min, 0, gsh1, bfr); gs_free(gsh1); - for (uint i = 0; i < size2; i++) - mnew[size1 + i] = mnew[size1 + size2 + i]; + for (uint i = 0; i < size2; i++) mnew[size1 + i] = mnew[size1 + size2 + i]; gs(mnew, gs_long, gs_min, 0, gsh, bfr); changed = 0; - for (uint i = 0; i < size1; i++) - changed += (mnew[i] != mcur[i]); + for (uint i = 0; i < size1; i++) changed += (mnew[i] != mcur[i]); comm_allreduce(c, gs_int, gs_max, &changed, 1, &wrk); } while (changed); - for (uint i = 0; i < size1; i++) - pe[i].globalId = mcur[i]; + for (uint i = 0; i < size1; i++) pe[i].globalId = mcur[i]; gs_free(gsh); free(mids), free(mnew), free(mcur); @@ -229,13 +218,11 @@ static int gatherMatchingPeriodicFaces(Mesh mesh, struct comm *c) { static int setPeriodicFaceCoordinates(Mesh mesh, struct comm *c, buffer *buf) { BoundaryFace bPtr = mesh->boundary.ptr; sint bSize = mesh->boundary.n; - if (bSize == 0) - return 0; + if (bSize == 0) return 0; Point ePtr = mesh->elements.ptr; sint eSize = mesh->elements.n; - if (eSize == 0) - return 0; + if (eSize == 0) return 0; /* Need boundary array to be sorted by elementId */ sarray_sort(struct boundary_t, bPtr, bSize, elementId, 1, buf); @@ -252,8 +239,7 @@ static int setPeriodicFaceCoordinates(Mesh mesh, struct comm *c, buffer *buf) { sint i = 0, k = 0; int nv = mesh->nv, nvf = mesh->nv / 2, j; while (i < bSize) { - while (k < eSize && ePtr[k].elementId < bPtr[i].elementId) - k += nv; + while (k < eSize && ePtr[k].elementId < bPtr[i].elementId) k += nv; // copy vertices to boundary face if (k < eSize && ePtr[k].elementId == bPtr[i].elementId) { int faceId = bPtr[i].faceId; diff --git a/src/con-unique-vertices.c b/src/con-unique-vertices.c index b4564f8a..207ac19c 100644 --- a/src/con-unique-vertices.c +++ b/src/con-unique-vertices.c @@ -17,8 +17,7 @@ static void tuple_sort_(void *ra, uint n, uint usize, uint offset) { #define get(ra_, l_) (*((double *)((char *)ra_ + (l_ - 1) * usize + offset))) - if (n < 2) - return; + if (n < 2) return; l = n / 2 + 1; ir = n; @@ -38,8 +37,7 @@ static void tuple_sort_(void *ra, uint n, uint usize, uint offset) { i = l; j = l + l; while (j <= ir) { - if (j < ir && get(ra, j) < get(ra, j + 1)) - j++; + if (j < ir && get(ra, j) < get(ra, j + 1)) j++; assert(j >= 1 && j <= n && "j2"); assert(i >= 1 && i <= n && "i"); if (get(rra, 1) < get(ra, j)) { @@ -64,8 +62,7 @@ static void tuple_sort_(void *ra, uint n, uint usize, uint offset) { static void sort_segments_local(struct array *local, int dim) { uint npts = local->n; - if (npts == 0) - return; + if (npts == 0) return; struct point_t *const pts = (struct point_t *const)local->ptr; uint s = 0, e; @@ -75,26 +72,17 @@ static void sort_segments_local(struct array *local, int dim) { if (s < npts - 1 && e - s > 1) { switch (dim) { - case 0: - tuple_sort(struct point_t, &pts[s], e - s, x[0]); - break; - case 1: - tuple_sort(struct point_t, &pts[s], e - s, x[1]); - break; - case 2: - tuple_sort(struct point_t, &pts[s], e - s, x[2]); - break; - default: - break; + case 0: tuple_sort(struct point_t, &pts[s], e - s, x[0]); break; + case 1: tuple_sort(struct point_t, &pts[s], e - s, x[1]); break; + case 2: tuple_sort(struct point_t, &pts[s], e - s, x[2]); break; + default: break; } } uint sum = 0; - for (uint i = s; i < e; i++) - sum += pts[i].ifSegment, pts[i].ifSegment = 0; + for (uint i = s; i < e; i++) sum += pts[i].ifSegment, pts[i].ifSegment = 0; - if (sum > 0) - pts[s].ifSegment = 1; + if (sum > 0) pts[s].ifSegment = 1; s = e; } @@ -113,23 +101,20 @@ static void sort_segments_shared_aux(struct array *arr, int dim, struct comm *c, case 2: parallel_sort(struct point_t, arr, x[2], gs_double, 0, 1, c, bfr); break; - default: - break; + default: break; } parrsb_print(c, verbose, "\t\t\t\tsss_aux_parallel_sort: done.\n"); // Mark the first point of the segment to have ifSegment = 1 and zero out // everything else. struct point_t *const pts = (struct point_t *const)arr->ptr; - for (uint i = 0; i < arr->n; i++) - pts[i].ifSegment = 0; + for (uint i = 0; i < arr->n; i++) pts[i].ifSegment = 0; sint wrk; sint rank = (arr->n > 0) ? c->id : c->np; comm_allreduce(c, gs_int, gs_min, &rank, 1, &wrk); - if ((sint)c->id == rank) - pts[0].ifSegment = 1; + if ((sint)c->id == rank) pts[0].ifSegment = 1; parrsb_print(c, verbose, "\t\t\t\tsss_aux_mark_first_point: done."); } @@ -176,8 +161,7 @@ static uint find_bin_cr(const slong id, const struct comm *c, const int verbose, uint s = 0; while (s < arr.n) { uint e = s + 1; - for (; e < arr.n && pa[s].id == pa[e].id; e++) - pa[e].procm = pa[s].procm; + for (; e < arr.n && pa[s].id == pa[e].id; e++) pa[e].procm = pa[s].procm; s = e; } } @@ -215,8 +199,7 @@ static void sort_segments_shared(struct array *shared, int dim, struct comm *c, sum += pts[0].ifSegment; array_cat(struct point_t, &segments[ngids - 1], &pts[0], 1); for (uint i = 1; i < shared->n; i++) { - if (pts[i].ifSegment > 0) - gids[1] = pts[i].globalId, ngids++; + if (pts[i].ifSegment > 0) gids[1] = pts[i].globalId, ngids++; sum += pts[i].ifSegment; array_cat(struct point_t, &segments[ngids - 1], &pts[i], 1); } @@ -230,8 +213,7 @@ static void sort_segments_shared(struct array *shared, int dim, struct comm *c, // algo = 2 is a custom crystal router implementation. int algo = 0; char *val = getenv("PARRSB_FIND_BIN_ALGO"); - if (val) - algo = atoi(val); + if (val) algo = atoi(val); assert(algo >= 0 && algo <= 2); // We sort the shared segments in two phases. All the segments having an even @@ -285,8 +267,7 @@ static int talk_to_neighbor(struct point_t *pnt, const struct array *arr, int dir, const struct comm *c) { assert(dir == -1 || dir == 1); - if (c->np <= 1) - return 0; + if (c->np <= 1) return 0; struct comm active; comm_split(c, arr->n > 0, c->id, &active); @@ -312,8 +293,7 @@ static int talk_to_neighbor(struct point_t *pnt, const struct array *arr, sarray_transfer(struct point_t, &tmp, proc, 1, &cr); crystal_free(&cr); - if (tmp.n == 0) - return 0; + if (tmp.n == 0) return 0; pts = (struct point_t *)tmp.ptr, pnt[0] = pts[0]; array_free(&tmp), comm_free(&active); @@ -327,8 +307,7 @@ static void find_segments(struct array *arr, int i, scalar tol2, scalar d = diff_sqr(pts[j].x[i], pts[j - 1].x[i]); scalar dx = MIN(pts[j].dx, pts[j - 1].dx) * tol2; - if (d > dx) - pts[j].ifSegment = 1; + if (d > dx) pts[j].ifSegment = 1; } struct point_t pnt; @@ -336,8 +315,7 @@ static void find_segments(struct array *arr, int i, scalar tol2, if (npts > 0) { // npts > 0 --> arr->n > 0 scalar d = diff_sqr(pnt.x[i], pts[0].x[i]); scalar dx = MIN(pnt.dx, pts[0].dx) * tol2; - if (d > dx) - pts[0].ifSegment = 1; + if (d > dx) pts[0].ifSegment = 1; } } @@ -345,8 +323,7 @@ static inline void remove_marked(struct array *arr) { struct point_t *pts = (struct point_t *)arr->ptr; uint count = 0; for (uint i = 0; i < arr->n; i++) { - if (pts[i].ifSegment != -1) - pts[count] = pts[i], count++; + if (pts[i].ifSegment != -1) pts[count] = pts[i], count++; } arr->n = count; } @@ -404,8 +381,7 @@ static slong number_segments(struct array *local, struct array *shared, struct point_t *pts = (struct point_t *)local->ptr; uint lcnt = 0; for (uint i = 0; i < local->n; i++) { - if (pts[i].ifSegment) - lcnt++; + if (pts[i].ifSegment) lcnt++; } slong out[2][1], wrk[2][1], in = lcnt; @@ -414,8 +390,7 @@ static slong number_segments(struct array *local, struct array *shared, ls--; for (uint i = 0; i < local->n; i++) { - if (pts[i].ifSegment) - ls++; + if (pts[i].ifSegment) ls++; assert(ls >= 0); pts[i].globalId = ls; } @@ -423,8 +398,7 @@ static slong number_segments(struct array *local, struct array *shared, uint scnt = 0; pts = (struct point_t *)shared->ptr; for (uint i = 0; i < shared->n; i++) { - if (pts[i].ifSegment) - scnt++; + if (pts[i].ifSegment) scnt++; } in = scnt; @@ -433,8 +407,7 @@ static slong number_segments(struct array *local, struct array *shared, ss = lt + ss, ss--; for (uint i = 0; i < shared->n; i++) { - if (pts[i].ifSegment) - ss++; + if (pts[i].ifSegment) ss++; assert(ss >= lt); pts[i].globalId = ss; } @@ -451,16 +424,14 @@ static void number_points(struct array *elems, const struct array *local, slong s = out[0][0], nl = out[1][0]; struct point_t *pts = (struct point_t *)local->ptr; - for (uint i = 0; i < local->n; i++) - pts[i].pntid = s + i; + for (uint i = 0; i < local->n; i++) pts[i].pntid = s + i; in = shared->n; comm_scan(out, c, gs_long, gs_add, &in, 1, wrk); s = out[0][0] + nl; pts = (struct point_t *)shared->ptr; - for (uint i = 0; i < shared->n; i++) - pts[i].pntid = s + i; + for (uint i = 0; i < shared->n; i++) pts[i].pntid = s + i; // Copy everything back to elements array. elems->n = 0; @@ -483,8 +454,7 @@ int find_unique_vertices(Mesh mesh, struct comm *c, scalar tol, int verbose, // points are a single segment. struct array *elems = &mesh->elements; struct point_t *pts = (struct point_t *)elems->ptr; - for (uint i = 0; i < elems->n; i++) - pts[i].ifSegment = pts[i].globalId = 0; + for (uint i = 0; i < elems->n; i++) pts[i].ifSegment = pts[i].globalId = 0; slong npts = elems->n, wrk; comm_allreduce(c, gs_long, gs_add, &npts, 1, &wrk); diff --git a/src/con.c b/src/con.c index c34a918b..92932692 100644 --- a/src/con.c +++ b/src/con.c @@ -70,8 +70,7 @@ int find_min_neighbor_distance(Mesh mesh) { uint ndim = mesh->ndim; uint nv = mesh->nv; - if (ndim < 2 || ndim > 3) - return 1; + if (ndim < 2 || ndim > 3) return 1; uint i, j, k, neighbor; if (ndim == 3) { @@ -117,8 +116,7 @@ static int set_global_id(Mesh mesh, struct comm *c) { if (bin == 1) { slong count = 0; for (uint i = 0; i < nPoints; i++) - if (points[i].ifSegment) - count++; + if (points[i].ifSegment) count++; slong in = count, out[2][1], buf[2][1]; comm_scan(out, &nonZeroRanks, gs_long, gs_add, &in, 1, buf); @@ -126,8 +124,7 @@ static int set_global_id(Mesh mesh, struct comm *c) { count = -1; for (uint i = 0; i < nPoints; i++) { - if (points[i].ifSegment) - count++; + if (points[i].ifSegment) count++; assert(start + count >= 0); points[i].globalId = start + count; } @@ -200,8 +197,7 @@ int parrsb_conn_mesh(long long *vtx, double *coord, uint nelt, unsigned ndim, int verbose = 1; { const char *val = getenv("PARRSB_VERBOSE_LEVEL"); - if (val != NULL) - verbose = atoi(val); + if (val != NULL) verbose = atoi(val); } parrsb_print(&c, verbose, "Running parCon ..."); @@ -279,8 +275,7 @@ int parrsb_conn_mesh(long long *vtx, double *coord, uint nelt, unsigned ndim, // Report timing info and finish { double gmin[8], gmax[8], buf[8]; - for (unsigned i = 0; i < 8; i++) - gmax[i] = gmin[i] = duration[i]; + for (unsigned i = 0; i < 8; i++) gmax[i] = gmin[i] = duration[i]; comm_allreduce(&c, gs_double, gs_min, gmin, 8, buf); comm_allreduce(&c, gs_double, gs_max, gmax, 8, buf); diff --git a/src/eigen.c b/src/eigen.c index 7c86a775..107d6c3b 100644 --- a/src/eigen.c +++ b/src/eigen.c @@ -26,12 +26,10 @@ void matrix_inverse(int N, double *A) { double *work = (double *)calloc(N * N, sizeof(double)); FDGETRF(&N, &N, A, &N, ipiv, &info); - if (info != 0) - printf("dgetrf: %d\n", info); + if (info != 0) printf("dgetrf: %d\n", info); FDGETRI(&N, A, &N, ipiv, work, &size, &info); - if (info != 0) - printf("dgetri: %d\n", info); + if (info != 0) printf("dgetri: %d\n", info); free(ipiv); free(work); diff --git a/src/fiedler.c b/src/fiedler.c index f98edbe7..313462c0 100644 --- a/src/fiedler.c +++ b/src/fiedler.c @@ -12,8 +12,7 @@ extern void matrix_inverse(int N, double *A); inline static scalar dot(scalar *y, scalar *x, uint n) { scalar result = 0.0; - for (uint i = 0; i < n; i++) - result += x[i] * y[i]; + for (uint i = 0; i < n; i++) result += x[i] * y[i]; return result; } @@ -21,15 +20,13 @@ inline static scalar dot(scalar *y, scalar *x, uint n) { inline static void ortho(scalar *q, uint lelt, ulong n, struct comm *c) { uint i; scalar sum = 0.0; - for (i = 0; i < lelt; i++) - sum += q[i]; + for (i = 0; i < lelt; i++) sum += q[i]; scalar buf; comm_allreduce(c, gs_double, gs_add, &sum, 1, &buf); sum /= n; - for (i = 0; i < lelt; i++) - q[i] -= sum; + for (i = 0; i < lelt; i++) q[i] -= sum; } struct fiedler { @@ -49,8 +46,7 @@ int power_serial(double *y, uint N, double *A, int verbose) { } scalar normi = 1.0 / sqrt(norm); - for (uint i = 0; i < N; i++) - y[i] *= normi; + for (uint i = 0; i < N; i++) y[i] *= normi; double *Ay = tcalloc(double, N); scalar err = 1.0, lambda; @@ -59,22 +55,17 @@ int power_serial(double *y, uint N, double *A, int verbose) { norm = 0.0; for (uint j = 0; j < N; j++) { Ay[j] = 0.0; - for (uint k = 0; k < N; k++) { - Ay[j] += A[j * N + k] * y[k]; - } + for (uint k = 0; k < N; k++) { Ay[j] += A[j * N + k] * y[k]; } norm += Ay[j] * Ay[j]; } - if (i > 0) - err = (sqrt(norm) - lambda) / lambda; + if (i > 0) err = (sqrt(norm) - lambda) / lambda; lambda = sqrt(norm); normi = 1.0 / sqrt(norm); - for (uint j = 0; j < N; j++) - y[j] = Ay[j] * normi; + for (uint j = 0; j < N; j++) y[j] = Ay[j] * normi; - if (fabs(err) < 1e-12) - break; + if (fabs(err) < 1e-12) break; } free(Ay); @@ -84,16 +75,14 @@ int power_serial(double *y, uint N, double *A, int verbose) { int inv_power_serial(double *y, uint N, double *A, int verbose) { double *Ainv = tcalloc(double, N *N); for (uint j = 0; j < N; j++) { - for (uint k = 0; k < N; k++) - Ainv[j * N + k] = A[k * N + j]; + for (uint k = 0; k < N; k++) Ainv[j * N + k] = A[k * N + j]; } matrix_inverse(N, Ainv); uint j; for (j = 0; j < N; j++) { - for (uint k = 0; k < N; k++) - A[j * N + k] = Ainv[k * N + j]; + for (uint k = 0; k < N; k++) A[j * N + k] = Ainv[k * N + j]; } j = power_serial(y, N, Ainv, verbose); @@ -109,8 +98,7 @@ static int project(scalar *x, uint n, scalar *b, struct laplacian *L, comm_scan(out, c, gs_long, gs_add, &in, 1, buf); ulong ng = out[1][0]; - if (ng == 0) - return 0; + if (ng == 0) return 0; scalar *z = (scalar *)tcalloc(scalar, 6 * n); scalar *w = z + n, *r = w + n, *p = r + n, *z0 = p + n, *dz = z0 + n; @@ -118,22 +106,18 @@ static int project(scalar *x, uint n, scalar *b, struct laplacian *L, scalar *W = P + n * (miter + 1); uint i; - for (i = 0; i < n; i++) - x[i] = 0, r[i] = b[i]; + for (i = 0; i < n; i++) x[i] = 0, r[i] = b[i]; scalar rr = dot(r, r, n); comm_allreduce(c, gs_double, gs_add, &rr, 1, buf); scalar rtol = rr * tol * tol; - for (i = 0; i < n; i++) - z[i] = r[i]; - if (null_space) - ortho(z, n, ng, c); + for (i = 0; i < n; i++) z[i] = r[i]; + if (null_space) ortho(z, n, ng, c); scalar rz1 = dot(z, z, n); comm_allreduce(c, gs_double, gs_add, &rz1, 1, buf); - for (i = 0; i < n; i++) - p[i] = z[i]; + for (i = 0; i < n; i++) p[i] = z[i]; scalar alpha, beta, rzt, rz2; @@ -149,33 +133,27 @@ static int project(scalar *x, uint n, scalar *b, struct laplacian *L, alpha = rz1 / pw; pw = 1 / sqrt(pw); - for (j = 0; j < n; j++) - W[i * n + j] = pw * w[j], P[i * n + j] = pw * p[j]; + for (j = 0; j < n; j++) W[i * n + j] = pw * w[j], P[i * n + j] = pw * p[j]; - for (j = 0; j < n; j++) - x[j] += alpha * p[j], r[j] -= alpha * w[j]; + for (j = 0; j < n; j++) x[j] += alpha * p[j], r[j] -= alpha * w[j]; rr = dot(r, r, n); comm_allreduce(c, gs_double, gs_add, &rr, 1, buf); - if (rr < rtol || sqrt(rr) < tol) - break; + if (rr < rtol || sqrt(rr) < tol) break; - for (j = 0; j < n; j++) - z0[j] = z[j]; + for (j = 0; j < n; j++) z0[j] = z[j]; metric_tic(c, RSB_PROJECT_MG); mg_vcycle(z, r, d, c, bfr); metric_toc(c, RSB_PROJECT_MG); rzt = rz1; - if (null_space) - ortho(z, n, ng, c); + if (null_space) ortho(z, n, ng, c); rz1 = dot(r, z, n); comm_allreduce(c, gs_double, gs_add, &rz1, 1, buf); - for (j = 0; j < n; j++) - dz[j] = z[j] - z0[j]; + for (j = 0; j < n; j++) dz[j] = z[j] - z0[j]; rz2 = dot(r, dz, n); comm_allreduce(c, gs_double, gs_add, &rz2, 1, buf); @@ -186,23 +164,18 @@ static int project(scalar *x, uint n, scalar *b, struct laplacian *L, } beta = rz2 / rzt; - for (j = 0; j < n; j++) - p[j] = z[j] + beta * p[j]; + for (j = 0; j < n; j++) p[j] = z[j] + beta * p[j]; - for (k = 0; k < n; k++) - P[miter * n + k] = 0; + for (k = 0; k < n; k++) P[miter * n + k] = 0; for (j = 0; j <= i; j++) { pw = 0; - for (k = 0; k < n; k++) - pw += W[j * n + k] * p[k]; + for (k = 0; k < n; k++) pw += W[j * n + k] * p[k]; comm_allreduce(c, gs_double, gs_add, &pw, 1, buf); - for (k = 0; k < n; k++) - P[miter * n + k] += pw * P[j * n + k]; + for (k = 0; k < n; k++) P[miter * n + k] += pw * P[j * n + k]; } - for (k = 0; k < n; k++) - p[k] -= P[miter * n + k]; + for (k = 0; k < n; k++) p[k] -= P[miter * n + k]; } free(z); @@ -234,8 +207,7 @@ static int inverse(scalar *y, struct array *elements, unsigned nv, scalar *z, uint i, j, k, l; for (i = k = 0; i < lelt; i++) { eid[i] = start + i + 1; - for (j = 0; j < nv; j++) - vtx[k++] = elems[i].vertices[j]; + for (j = 0; j < nv; j++) vtx[k++] = elems[i].vertices[j]; } // Setup LAMG preconditioner @@ -261,8 +233,7 @@ static int inverse(scalar *y, struct array *elements, unsigned nv, scalar *z, scalar lambda = dot(y, z, lelt); comm_allreduce(gsc, gs_double, gs_add, &lambda, 1, bfr); - for (uint j = 0; j < lelt; j++) - err[j] = y[j] - lambda * z[j]; + for (uint j = 0; j < lelt; j++) err[j] = y[j] - lambda * z[j]; scalar norme = dot(err, err, lelt); comm_allreduce(gsc, gs_double, gs_add, &norme, 1, bfr); norme = sqrt(norme); @@ -271,8 +242,7 @@ static int inverse(scalar *y, struct array *elements, unsigned nv, scalar *z, comm_allreduce(gsc, gs_double, gs_add, &norm, 1, bfr); scalar normi = 1.0 / sqrt(norm); - for (j = 0; j < lelt; j++) - z[j] = y[j] * normi; + for (j = 0; j < lelt; j++) z[j] = y[j] * normi; ortho(z, lelt, nelg, gsc); @@ -286,15 +256,13 @@ static int inverse(scalar *y, struct array *elements, unsigned nv, scalar *z, // rhs = Z[1:k-1,:]*z for (j = 0; j < i; j++) { rhs[j] = 0.0; - for (l = 0; l < lelt; l++) - rhs[j] += Z[j * lelt + l] * z[l]; + for (l = 0; l < lelt; l++) rhs[j] += Z[j * lelt + l] * z[l]; } // Global reduction rhs[j] comm_allreduce(gsc, gs_double, gs_add, rhs, i, bfr); // Z[k,:] = z[:] - Z[:,1:lelt]*rhs[:] - for (l = 0; l < lelt; l++) - Z[i * lelt + l] = z[l]; + for (l = 0; l < lelt; l++) Z[i * lelt + l] = z[l]; for (j = 0; j < i; j++) { for (l = 0; l < lelt; l++) @@ -303,22 +271,19 @@ static int inverse(scalar *y, struct array *elements, unsigned nv, scalar *z, // Z[k,:]= Z[k,:]/||Z[k,:]|| norm = 0.0; - for (l = 0; l < lelt; l++) - norm += Z[i * lelt + l] * Z[i * lelt + l]; + for (l = 0; l < lelt; l++) norm += Z[i * lelt + l] * Z[i * lelt + l]; comm_allreduce(gsc, gs_double, gs_add, &norm, 1, bfr); norm = 1.0 / sqrt(norm); - for (l = 0; l < lelt; l++) - Z[i * lelt + l] *= norm; + for (l = 0; l < lelt; l++) Z[i * lelt + l] *= norm; // M=Z(1:k,:)*G*Z(1:k,:); for (j = 0; j < N; j++) { laplacian(GZ, wl, &Z[j * lelt], buf); for (k = 0; k < N; k++) { M[k * N + j] = 0.0; - for (l = 0; l < lelt; l++) - M[k * N + j] += Z[k * lelt + l] * GZ[l]; + for (l = 0; l < lelt; l++) M[k * N + j] += Z[k * lelt + l] * GZ[l]; } } @@ -328,23 +293,19 @@ static int inverse(scalar *y, struct array *elements, unsigned nv, scalar *z, // Inverse power iterarion on M inv_power_serial(v, N, M, 0); - for (j = 0; j < lelt; j++) - z[j] = 0.0; + for (j = 0; j < lelt; j++) z[j] = 0.0; for (j = 0; j < N; j++) { - for (k = 0; k < lelt; k++) - z[k] += Z[j * lelt + k] * v[j]; + for (k = 0; k < lelt; k++) z[k] += Z[j * lelt + k] * v[j]; } ortho(z, lelt, nelg, gsc); } else { // Z(k,:) = z; - for (l = 0; l < lelt; l++) - Z[i * lelt + l] = z[l]; + for (l = 0; l < lelt; l++) Z[i * lelt + l] = z[l]; } } - if (ppfi == 1) - break; + if (ppfi == 1) break; } metric_toc(gsc, RSB_INVERSE); @@ -354,8 +315,7 @@ static int inverse(scalar *y, struct array *elements, unsigned nv, scalar *z, free(L); } mg_free(d); - if (err) - free(err); + if (err) free(err); return iters; } @@ -367,20 +327,16 @@ static double sign(scalar a, scalar b) { static int tqli(scalar *eVectors, scalar *eValues, sint n, scalar *diagonal, scalar *upper, int id) { - if (n == 0) - return 0; + if (n == 0) return 0; scalar *d = tcalloc(scalar, 2 * n), *e = d + n; sint i; - for (i = 0; i < n; i++) - d[i] = diagonal[i]; - for (i = 0; i < n - 1; i++) - e[i] = upper[i]; + for (i = 0; i < n; i++) d[i] = diagonal[i]; + for (i = 0; i < n - 1; i++) e[i] = upper[i]; e[n - 1] = 0.0; for (i = 0; i < n; i++) { - for (sint j = 0; j < n; j++) - eVectors[i * n + j] = 0; + for (sint j = 0; j < n; j++) eVectors[i * n + j] = 0; eVectors[i * n + i] = 1; } @@ -391,17 +347,14 @@ static int tqli(scalar *eVectors, scalar *eValues, sint n, scalar *diagonal, for (m = l; m < n - 1; m++) { scalar dd = fabs(d[m]) + fabs(d[m + 1]); /* Should use a tolerance for this check */ - if (fabs(e[m]) / dd < SCALAR_TOL) - break; + if (fabs(e[m]) / dd < SCALAR_TOL) break; } if (m != l) { if (iter++ == 30) { - if (id == 0) - printf("Too many iterations.\n"); + if (id == 0) printf("Too many iterations.\n"); // vec_copy(*eValues, d); - for (i = 0; i < n; i++) - eValues[i] = d[i]; + for (i = 0; i < n; i++) eValues[i] = d[i]; return 1; } @@ -443,8 +396,7 @@ static int tqli(scalar *eVectors, scalar *eValues, sint n, scalar *diagonal, /* Done with eigenvectors */ } - if (r < SCALAR_TOL && i >= l) - continue; + if (r < SCALAR_TOL && i >= l) continue; d[l] -= p; e[l] = g; @@ -466,16 +418,13 @@ static int tqli(scalar *eVectors, scalar *eValues, sint n, scalar *diagonal, e[k] = 0; for (sint i = 0; i < n; i++) e[k] += eVectors[k * n + i] * eVectors[k * n + i]; - if (e[k] > 0.0) - e[k] = sqrt(fabs(e[k])); + if (e[k] > 0.0) e[k] = sqrt(fabs(e[k])); scalar scale = 1.0 / e[k]; - for (sint i = 0; i < n; i++) - eVectors[k * n + i] *= scale; + for (sint i = 0; i < n; i++) eVectors[k * n + i] *= scale; } // vec_copy(*eValues, d); - for (i = 0; i < n; i++) - eValues[i] = d[i]; + for (i = 0; i < n; i++) eValues[i] = d[i]; free(d); @@ -488,8 +437,7 @@ static int lanczos_aux(scalar *diag, scalar *upper, scalar *rr, uint lelt, scalar *r = tcalloc(scalar, 3 * lelt), *p = r + lelt, *w = p + lelt; // vec_copy(r, f); uint i; - for (i = 0; i < lelt; i++) - r[i] = f[i]; + for (i = 0; i < lelt; i++) r[i] = f[i]; // vec_ortho(gsc, r, nelg); ortho(r, lelt, nelg, gsc); @@ -504,19 +452,16 @@ static int lanczos_aux(scalar *diag, scalar *upper, scalar *rr, uint lelt, // vec_scale(rr[0], r, rni); scalar rni = 1.0 / rnorm; - for (i = 0; i < lelt; i++) - rr[0 * lelt + i] = r[i] * rni; + for (i = 0; i < lelt; i++) rr[0 * lelt + i] = r[i] * rni; int iter; for (iter = 0; iter < niter; iter++) { rtz2 = rtz1, rtz1 = rtr; beta = rtz1 / rtz2; - if (iter == 0) - beta = 0.0; + if (iter == 0) beta = 0.0; // add2s1(p,r,beta,n) - for (i = 0; i < lelt; i++) - p[i] = beta * p[i] + r[i]; + for (i = 0; i < lelt; i++) p[i] = beta * p[i] + r[i]; scalar pp = dot(p, p, lelt); comm_allreduce(gsc, gs_double, gs_add, &pp, 1, buf); @@ -536,16 +481,14 @@ static int lanczos_aux(scalar *diag, scalar *upper, scalar *rr, uint lelt, alpha = rtz1 / pap; // vec_axpby(r, r, 1.0, w, -1.0 * alpha); - for (i = 0; i < lelt; i++) - r[i] = r[i] - alpha * w[i]; + for (i = 0; i < lelt; i++) r[i] = r[i] - alpha * w[i]; rtr = dot(r, r, lelt); comm_allreduce(gsc, gs_double, gs_add, &rtr, 1, buf); rnorm = sqrt(rtr), rni = 1.0 / rnorm; // vec_scale(rr[iter + 1], r, rni); - for (i = 0; i < lelt; i++) - rr[(iter + 1) * lelt + i] = r[i] * rni; + for (i = 0; i < lelt; i++) rr[(iter + 1) * lelt + i] = r[i] * rni; if (iter == 0) { diag[iter] = pap / rtz1; @@ -576,8 +519,7 @@ static int lanczos(scalar *fiedler, struct array *elements, unsigned nv, struct laplacian *wl = laplacian_init(elems, lelt, nv, GS, gsc, bfr); metric_toc(gsc, RSB_LANCZOS_SETUP); - if (nelg < miter) - miter = nelg; + if (nelg < miter) miter = nelg; scalar *alpha = tcalloc(scalar, 2 * miter - 1), *beta = alpha + miter; scalar *rr = tcalloc(scalar, (miter + 1) * lelt); @@ -608,8 +550,7 @@ static int lanczos(scalar *fiedler, struct array *elements, unsigned nv, fiedler[i] += rr[j * lelt + i] * eVectors[eValMinI * iter + j]; } ortho(fiedler, lelt, nelg, gsc); - for (uint i = 0; i < lelt; i++) - initv[i] = fiedler[i]; + for (uint i = 0; i < lelt; i++) initv[i] = fiedler[i]; metric_acc(RSB_LANCZOS_TQLI, comm_time() - t); } @@ -622,8 +563,7 @@ static int lanczos(scalar *fiedler, struct array *elements, unsigned nv, int fiedler(struct array *elements, int nv, const parrsb_options *const opts, struct comm *gsc, buffer *buf, int verbose) { // Return if the number of processes is equal to 1. - if (gsc->np == 1) - return 0; + if (gsc->np == 1) return 0; metric_tic(gsc, RSB_FIEDLER_SETUP); uint lelt = elements->n; @@ -634,8 +574,7 @@ int fiedler(struct array *elements, int nv, const parrsb_options *const opts, scalar *initv = tcalloc(scalar, lelt); for (uint i = 0; i < lelt; i++) { initv[i] = start + i + 1.0; - if (start + i < nelg / 2) - initv[i] += 1000 * nelg; + if (start + i < nelg / 2) initv[i] += 1000 * nelg; } ortho(initv, lelt, nelg, gsc); @@ -643,8 +582,7 @@ int fiedler(struct array *elements, int nv, const parrsb_options *const opts, comm_allreduce(gsc, gs_double, gs_add, &rtr, 1, &rni); rni = 1.0 / sqrt(rtr); - for (uint i = 0; i < lelt; i++) - initv[i] *= rni; + for (uint i = 0; i < lelt; i++) initv[i] *= rni; metric_toc(gsc, RSB_FIEDLER_SETUP); metric_tic(gsc, RSB_FIEDLER_CALC); @@ -660,30 +598,24 @@ int fiedler(struct array *elements, int nv, const parrsb_options *const opts, opts->rsb_max_passes, opts->rsb_tol, opts->rsb_mg_factor, opts->rsb_mg_grammian, nelg, buf); break; - default: - break; + default: break; } metric_toc(gsc, RSB_FIEDLER_CALC); metric_acc(RSB_FIEDLER_CALC_NITER, iter); scalar norm = 0; - for (uint i = 0; i < lelt; i++) - norm += f[i] * f[i]; + for (uint i = 0; i < lelt; i++) norm += f[i] * f[i]; scalar normi; comm_allreduce(gsc, gs_double, gs_add, &norm, 1, &normi); normi = 1.0 / sqrt(norm); - for (uint i = 0; i < lelt; i++) - f[i] *= normi; + for (uint i = 0; i < lelt; i++) f[i] *= normi; struct rsb_element *elems = (struct rsb_element *)elements->ptr; - for (uint i = 0; i < lelt; i++) - elems[i].fiedler = f[i]; + for (uint i = 0; i < lelt; i++) elems[i].fiedler = f[i]; - if (initv) - free(initv); - if (f) - free(f); + if (initv) free(initv); + if (f) free(f); return 0; } diff --git a/src/helpers.c b/src/helpers.c index 46c282f3..7c94d516 100644 --- a/src/helpers.c +++ b/src/helpers.c @@ -22,13 +22,12 @@ void parrsb_print_stack(void) { free(symbols); } #else -void parrsb_print_stack() {} +void parrsb_print_stack(void) {} #endif // defined __GLIBC__ int log2ll(long long n) { int k = 0; - while (n > 1) - n /= 2, k++; + while (n > 1) n /= 2, k++; return k; } @@ -50,8 +49,7 @@ int parrsb_dist_mesh(unsigned int *nelt_, long long **vl_, double **coord_, uint e, n; for (e = 0; e < nelt; ++e) { data.proc = part[e]; - for (n = 0; n < nv; ++n) - data.vtx[n] = vl[e * nv + n]; + for (n = 0; n < nv; ++n) data.vtx[n] = vl[e * nv + n]; array_cat(elem_data, &elements, &data, 1); } assert(elements.n == nelt); @@ -61,8 +59,7 @@ int parrsb_dist_mesh(unsigned int *nelt_, long long **vl_, double **coord_, double *coord = (coord_ == NULL ? NULL : *coord_); if (coord != NULL) { for (e = 0; e < nelt; e++) - for (n = 0; n < ndim * nv; n++) - ed[e].coord[n] = coord[e * ndim * nv + n]; + for (n = 0; n < ndim * nv; n++) ed[e].coord[n] = coord[e * ndim * nv + n]; } struct comm c; @@ -77,15 +74,13 @@ int parrsb_dist_mesh(unsigned int *nelt_, long long **vl_, double **coord_, vl = *vl_ = (long long *)realloc(*vl_, nv * nelt * sizeof(long long)); for (e = 0; e < nelt; ++e) - for (n = 0; n < nv; ++n) - vl[e * nv + n] = ed[e].vtx[n]; + for (n = 0; n < nv; ++n) vl[e * nv + n] = ed[e].vtx[n]; if (coord != NULL) { coord = *coord_ = (double *)realloc(*coord_, ndim * nv * nelt * sizeof(double)); for (e = 0; e < nelt; ++e) { - for (n = 0; n < ndim * nv; ++n) - coord[e * ndim * nv + n] = ed[e].coord[n]; + for (n = 0; n < ndim * nv; ++n) coord[e * ndim * nv + n] = ed[e].coord[n]; } } @@ -140,13 +135,11 @@ void parrsb_get_part_stat(int *nc, int *ns, int *nss, int *nel, long long *vtx, comm_init(&comm, ce); uint np = comm.np; - if (np == 1) - return; + if (np == 1) return; size_t Npts = nelt * nv; slong *data = (slong *)malloc((Npts + 1) * sizeof(slong)); - for (size_t i = 0; i < Npts; i++) - data[i] = vtx[i]; + for (size_t i = 0; i < Npts; i++) data[i] = vtx[i]; struct gs_data *gsh = gs_setup(data, Npts, &comm, 0, gs_pairwise, 0); int Nmsg; @@ -251,7 +244,8 @@ void parrsb_print_part_stat(long long *vtx, unsigned nelt, unsigned nv, } } -static void print_help() {} +// TODO: Print options supported by parRSB. +static void print_help(void) {} parrsb_cmd_line_opts *parrsb_parse_cmd_opts(int argc, char *argv[]) { parrsb_cmd_line_opts *in = tcalloc(parrsb_cmd_line_opts, 1); @@ -271,8 +265,7 @@ parrsb_cmd_line_opts *parrsb_parse_cmd_opts(int argc, char *argv[]) { size_t len; for (;;) { int c = getopt_long(argc, argv, "", long_options, NULL); - if (c == -1) - break; + if (c == -1) break; switch (c) { case 0: @@ -280,26 +273,13 @@ parrsb_cmd_line_opts *parrsb_parse_cmd_opts(int argc, char *argv[]) { in->mesh = tcalloc(char, len + 1); strncpy(in->mesh, optarg, len); break; - case 10: - in->tol = atof(optarg); - break; - case 20: - in->test = 1; - break; - case 30: - in->dump = 1; - break; - case 40: - in->nactive = atoi(optarg); - break; - case 50: - in->verbose = atoi(optarg); - break; - case 99: - print_help(); - break; - default: - exit(EXIT_FAILURE); + case 10: in->tol = atof(optarg); break; + case 20: in->test = 1; break; + case 30: in->dump = 1; break; + case 40: in->nactive = atoi(optarg); break; + case 50: in->verbose = atoi(optarg); break; + case 99: print_help(); break; + default: exit(EXIT_FAILURE); } } @@ -313,8 +293,7 @@ parrsb_cmd_line_opts *parrsb_parse_cmd_opts(int argc, char *argv[]) { void parrsb_cmd_opts_free(parrsb_cmd_line_opts *opts) { if (opts) { - if (opts->mesh) - free(opts->mesh); + if (opts->mesh) free(opts->mesh); free(opts); } } @@ -374,8 +353,7 @@ int parrsb_vector_dump(const char *fname, scalar *y, struct rsb_element *elm, int ndim = (nv == 8) ? 3 : 2; uint write_size = ((ndim + 1) * sizeof(double) + sizeof(slong)) * nelt; - if (rank == 0) - write_size += sizeof(long) + sizeof(int); // for nelgt and ndim + if (rank == 0) write_size += sizeof(long) + sizeof(int); // for nelgt and ndim char *bfr, *bfr0; bfr = bfr0 = (char *)calloc(write_size, sizeof(char)); diff --git a/src/io.c b/src/io.c index 47dec216..2c0be38e 100644 --- a/src/io.c +++ b/src/io.c @@ -300,8 +300,7 @@ static int read_connectivity(unsigned int *nelt_, unsigned *nv_, size_t read_size = nelt * (nv + 1) * sizeof(int); size_t header_size = GC_CO2_HEADER_LEN + sizeof(float); - if (rank == 0) - read_size += header_size; + if (rank == 0) read_size += header_size; buf = (char *)realloc(buf, read_size * sizeof(char)); err = MPI_File_read_ordered(file, buf, read_size, MPI_BYTE, &st); @@ -388,8 +387,7 @@ int parrsb_dump_con(char *name, unsigned nelt, unsigned nv, long long *vl, int write_size = nelt * (nv + 1) * sizeof(int); int header_size = GC_CO2_HEADER_LEN + sizeof(float); - if (id == 0) - write_size += header_size; + if (id == 0) write_size += header_size; char *buf = (char *)calloc(write_size, sizeof(char)); char *buf0 = buf; @@ -471,8 +469,7 @@ int parrsb_dump_map(char *name, unsigned nelt, unsigned nv, long long *vtx, } int writeSize = 0; - if (rank == 0) - writeSize = HEADER_LEN * sizeof(char) + sizeof(float); + if (rank == 0) writeSize = HEADER_LEN * sizeof(char) + sizeof(float); writeSize += (nv + 1) * nelt * sizeof(int); char *buf = (char *)calloc(writeSize, sizeof(char)); @@ -489,8 +486,7 @@ int parrsb_dump_map(char *name, unsigned nelt, unsigned nv, long long *vtx, memcpy(buf0, &rank, sizeof(int)); buf0 += sizeof(int); - for (unsigned j = 0; j < nv; j++) - ivtx[j] = vtx[i * nv + j]; + for (unsigned j = 0; j < nv; j++) ivtx[j] = vtx[i * nv + j]; memcpy(buf0, ivtx, sizeof(int) * nv); buf0 += nv * sizeof(int); } diff --git a/src/laplacian.c b/src/laplacian.c index 34067751..60af697c 100644 --- a/src/laplacian.c +++ b/src/laplacian.c @@ -50,8 +50,7 @@ static void find_nbrs_rsb(struct array *arr, const struct rsb_element *elems, array_init(struct nbr, arr, vertices.n * 10); while (s < vn) { e = s + 1; - while (e < vn && vptr[s].c == vptr[e].c) - e++; + while (e < vn && vptr[s].c == vptr[e].c) e++; for (i = s; i < e; i++) { t = vptr[i]; for (j = s; j < e; j++) { @@ -165,14 +164,12 @@ static int gs_weighted_init(struct laplacian *l, struct rsb_element *elems, slong *vertices = tcalloc(slong, npts); uint i, j; for (i = 0; i < lelt; i++) - for (j = 0; j < nv; j++) - vertices[i * nv + j] = elems[i].vertices[j]; + for (j = 0; j < nv; j++) vertices[i * nv + j] = elems[i].vertices[j]; struct gs_laplacian *gl = l->data = tcalloc(struct gs_laplacian, 1); gl->u = tcalloc(scalar, npts); for (i = 0; i < lelt; i++) - for (j = 0; j < nv; j++) - gl->u[nv * i + j] = 1.0; + for (j = 0; j < nv; j++) gl->u[nv * i + j] = 1.0; gl->gsh = gs_setup(vertices, npts, c, 0, gs_crystal_router, 0); gs(gl->u, gs_double, gs_add, 0, gl->gsh, buf); @@ -180,12 +177,10 @@ static int gs_weighted_init(struct laplacian *l, struct rsb_element *elems, gl->diag = tcalloc(scalar, lelt); for (i = 0; i < lelt; i++) { gl->diag[i] = 0.0; - for (j = 0; j < nv; j++) - gl->diag[i] += gl->u[nv * i + j]; + for (j = 0; j < nv; j++) gl->diag[i] += gl->u[nv * i + j]; } - if (vertices != NULL) - free(vertices); + if (vertices != NULL) free(vertices); return 0; } @@ -197,15 +192,13 @@ static int gs_weighted(scalar *v, struct laplacian *l, scalar *u, buffer *bfr) { uint i, j; for (i = 0; i < lelt; i++) - for (j = 0; j < nv; j++) - gl->u[nv * i + j] = u[i]; + for (j = 0; j < nv; j++) gl->u[nv * i + j] = u[i]; gs(gl->u, gs_double, gs_add, 0, gl->gsh, bfr); for (i = 0; i < lelt; i++) { v[i] = gl->diag[i] * u[i]; - for (j = 0; j < nv; j++) - v[i] -= gl->u[nv * i + j]; + for (j = 0; j < nv; j++) v[i] -= gl->u[nv * i + j]; } return 0; @@ -213,10 +206,8 @@ static int gs_weighted(scalar *v, struct laplacian *l, scalar *u, buffer *bfr) { static int gs_weighted_free(struct laplacian *l) { struct gs_laplacian *gl = l->data; - if (gl->u != NULL) - free(gl->u); - if (gl->diag != NULL) - free(gl->diag); + if (gl->u != NULL) free(gl->u); + if (gl->diag != NULL) free(gl->diag); gs_free(gl->gsh); free(l->data); return 0; @@ -261,9 +252,7 @@ void laplacian_free(struct laplacian *l) { if (l) { if (l->type & CSR) par_csr_free(l); - else if (l->type & GS) { - gs_weighted_free(l); - } + else if (l->type & GS) { gs_weighted_free(l); } free(l); } } diff --git a/src/mat.c b/src/mat.c index 37bd401c..a8491bef 100644 --- a/src/mat.c +++ b/src/mat.c @@ -5,8 +5,7 @@ #define FREE(ptr, x) \ { \ - if (ptr->x != NULL) \ - free(ptr->x); \ + if (ptr->x != NULL) free(ptr->x); \ } //------------------------------------------------------------------------------ @@ -17,8 +16,7 @@ // to ensure all the int compress_nbrs(struct array *eij, struct array *nbr, buffer *bfr) { array_init(struct mij, eij, nbr->n); - if (nbr->n == 0) - return 1; + if (nbr->n == 0) return 1; sarray_sort_2(struct nbr, nbr->ptr, nbr->n, r, 1, c, 1, bfr); @@ -31,8 +29,7 @@ int compress_nbrs(struct array *eij, struct array *nbr, buffer *bfr) { m.r = ptr[i].r, m.c = ptr[i].c; uint j = i + 1; - while (j < nbr->n && ptr[j].r == ptr[i].r && ptr[j].c == ptr[i].c) - j++; + while (j < nbr->n && ptr[j].r == ptr[i].r && ptr[j].c == ptr[i].c) j++; m.v = j - i, m.v = -m.v; array_cat(struct mij, eij, &m, 1); @@ -112,8 +109,7 @@ int csr_setup(struct mat *mat, struct array *entries, int sep, buffer *buf) { uint i, j; for (nr = 1, i = 1, j = 0; i < nnz; i++) { if ((unique[j].r != ptr[i].r) || (unique[j].c != ptr[i].c)) { - if (unique[j].r != ptr[i].r) - Lp[nr] = j + 1 - sep * nr, nr++; + if (unique[j].r != ptr[i].r) Lp[nr] = j + 1 - sep * nr, nr++; unique[++j] = ptr[i]; } else unique[j].v += ptr[i].v; @@ -235,8 +231,7 @@ void find_nbrs(struct array *arr, const ulong *eid, const slong *vtx, uint s = 0, e; while (s < vertices.n) { e = s + 1; - while (e < vertices.n && pv[s].c == pv[e].c) - e++; + while (e < vertices.n && pv[s].c == pv[e].c) e++; for (uint i = s; i < e; i++) { t = pv[i]; for (uint j = s; j < e; j++) { @@ -538,19 +533,15 @@ void par_csr_to_csc(struct par_mat *N, const struct par_mat *M, int diag, assert(IS_CSR(M)); slong *cols = tcalloc(slong, M->cn + M->rn); - for (uint i = 0; i < M->cn; i++) - cols[i] = -M->cols[i]; - for (uint i = 0; i < M->rn; i++) - cols[M->cn + i] = M->rows[i]; + for (uint i = 0; i < M->cn; i++) cols[i] = -M->cols[i]; + for (uint i = 0; i < M->rn; i++) cols[M->cn + i] = M->rows[i]; struct comm *c = &cr->comm; struct gs_data *gsh = gs_setup(cols, M->cn + M->rn, c, 0, gs_pairwise, 0); sint *owner = (sint *)cols; - for (uint i = 0; i < M->cn; i++) - owner[i] = -1; - for (uint i = 0; i < M->rn; i++) - owner[M->cn + i] = c->id; + for (uint i = 0; i < M->cn; i++) owner[i] = -1; + for (uint i = 0; i < M->rn; i++) owner[M->cn + i] = c->id; gs(owner, gs_int, gs_max, 0, gsh, bfr); gs_free(gsh); @@ -589,19 +580,15 @@ void par_csc_to_csr(struct par_mat *N, const struct par_mat *M, int diag, assert(IS_CSC(M) && !IS_DIAG(M)); slong *rows = tcalloc(slong, M->rn + M->cn); - for (uint i = 0; i < M->rn; i++) - rows[i] = -M->rows[i]; - for (uint i = 0; i < M->cn; i++) - rows[M->rn + i] = M->cols[i]; + for (uint i = 0; i < M->rn; i++) rows[i] = -M->rows[i]; + for (uint i = 0; i < M->cn; i++) rows[M->rn + i] = M->cols[i]; struct comm *c = &cr->comm; struct gs_data *gsh = gs_setup(rows, M->rn + M->cn, c, 0, gs_pairwise, 0); sint *owner = (sint *)rows; - for (uint i = 0; i < M->rn; i++) - owner[i] = -1; - for (uint i = 0; i < M->cn; i++) - owner[M->rn + i] = c->id; + for (uint i = 0; i < M->rn; i++) owner[i] = -1; + for (uint i = 0; i < M->cn; i++) owner[M->rn + i] = c->id; gs(owner, gs_int, gs_max, 0, gsh, bfr); gs_free(gsh); @@ -716,8 +703,7 @@ int par_mat_free(struct par_mat *A) { // static int compress_mij(struct array *eij, struct array *entries, buffer *bfr) { eij->n = 0; - if (entries->n == 0) - return 1; + if (entries->n == 0) return 1; sarray_sort_2(struct mij, entries->ptr, entries->n, r, 1, c, 1, bfr); @@ -835,8 +821,7 @@ struct gs_data *setup_Q(const struct par_mat *M, const struct comm *c, for (i = 0; i < n; i++) for (j = M->adj_off[i]; j < M->adj_off[i + 1]; j++) sids[j] = -ids[M->adj_idx[j]]; - for (i = 0; i < n; i++) - sids[j++] = diag[i]; + for (i = 0; i < n; i++) sids[j++] = diag[i]; return gs_setup(sids, nnz, c, 0, gs_crystal_router, 0); } @@ -848,10 +833,8 @@ void mat_vec_csr(scalar *y, const scalar *x, const struct par_mat *M, uint n = M->rn, *Lp = M->adj_off, nnz = n > 0 ? Lp[n] : 0; uint i, j, je; - for (i = 0; i < nnz; i++) - buf[i] = 0.0; // Is this really necessary? - for (i = 0, j = nnz; i < n; i++, j++) - y[i] = buf[j] = x[i]; + for (i = 0; i < nnz; i++) buf[i] = 0.0; // Is this really necessary? + for (i = 0, j = nnz; i < n; i++, j++) y[i] = buf[j] = x[i]; gs(buf, gs_double, gs_add, 0, gsh, bfr); @@ -867,8 +850,7 @@ void mat_vec_csr(scalar *y, const scalar *x, const struct par_mat *M, void par_arr_dump(const char *name, struct array *arr, struct crystal *cr, buffer *bfr) { struct mij *ptr = arr->ptr; - for (uint i = 0; i < arr->n; i++) - ptr[i].p = 0; + for (uint i = 0; i < arr->n; i++) ptr[i].p = 0; sarray_transfer(struct mij, arr, p, 0, cr); sarray_sort_2(struct mij, arr->ptr, arr->n, r, 1, c, 1, bfr); diff --git a/src/metrics.c b/src/metrics.c index ac3f2df6..b49a900f 100644 --- a/src/metrics.c +++ b/src/metrics.c @@ -11,9 +11,8 @@ static double metrics[MAXMETS]; static double *stack; static uint stack_size; -void metric_init() { - for (uint i = 0; i < MAXMETS; i++) - metrics[i] = 0.0; +void metric_init(void) { + for (uint i = 0; i < MAXMETS; i++) metrics[i] = 0.0; stack = tcalloc(double, MAXSIZE); stack_size = 0; } @@ -33,14 +32,12 @@ void metric_toc(struct comm *c, metric m) { } double metric_get_value(int level, metric m) { - if (level < 0) - return metrics[m]; - if ((uint)level < stack_size) - return stack[level * MAXMETS + m]; + if (level < 0) return metrics[m]; + if ((uint)level < stack_size) return stack[level * MAXMETS + m]; return 0.0; } -void metric_push_level() { +void metric_push_level(void) { assert(stack_size < MAXLVLS && "stack_size >= MAXLVLS"); for (unsigned i = 0; i < MAXMETS; i++) { @@ -50,22 +47,19 @@ void metric_push_level() { stack_size++; } -uint metric_get_levels() { return stack_size; } +uint metric_get_levels(void) { return stack_size; } static void metric_print_aux(double *wrk, struct comm *c) { double *min = wrk, *max = min + MAXSIZE, *sum = max + MAXSIZE; double *buf = sum + MAXSIZE; uint max_size = stack_size * MAXMETS; - for (uint i = 0; i < max_size; i++) { - min[i] = max[i] = sum[i] = stack[i]; - } + for (uint i = 0; i < max_size; i++) { min[i] = max[i] = sum[i] = stack[i]; } comm_allreduce(c, gs_double, gs_min, min, MAXSIZE, buf); // min comm_allreduce(c, gs_double, gs_max, max, MAXSIZE, buf); // max comm_allreduce(c, gs_double, gs_add, sum, MAXSIZE, buf); // sum - for (uint i = 0; i < max_size; i++) - sum[i] /= c->np; + for (uint i = 0; i < max_size; i++) sum[i] /= c->np; } #define SUMMARY(i, m) \ @@ -108,8 +102,7 @@ void metric_rsb_print(struct comm *c, int profile_level) { fflush(stdout); } - if (wrk) - free(wrk); + if (wrk) free(wrk); } void metric_crs_print(struct comm *c, int profile_level) { @@ -148,15 +141,13 @@ void metric_crs_print(struct comm *c, int profile_level) { fflush(stdout); } - if (wrk) - free(wrk); + if (wrk) free(wrk); } #undef SUMMARY -void metric_finalize() { - if (stack != NULL) - free(stack), stack = NULL; +void metric_finalize(void) { + if (stack != NULL) free(stack), stack = NULL; } #undef MAXMETS diff --git a/src/metrics.h b/src/metrics.h index 83d0bc3e..204c8cc8 100644 --- a/src/metrics.h +++ b/src/metrics.h @@ -42,16 +42,16 @@ typedef enum { TOL_INIT } metric; -void metric_init(); +void metric_init(void); void metric_acc(metric m, double val); void metric_set(metric m, double val); void metric_tic(struct comm *c, metric m); void metric_toc(struct comm *c, metric m); double metric_get_value(int level, metric m); -void metric_push_level(); -uint metric_get_levels(); +void metric_push_level(void); +uint metric_get_levels(void); void metric_rsb_print(struct comm *c, int profile_level); void metric_crs_print(struct comm *c, int profile_level); -void metric_finalize(); +void metric_finalize(void); #endif diff --git a/src/multigrid.c b/src/multigrid.c index b6e859a0..0c8ad144 100644 --- a/src/multigrid.c +++ b/src/multigrid.c @@ -81,8 +81,7 @@ static int sparse_gemm(struct par_mat *WG, const struct par_mat *W, sarray_sort_2(struct mij, gij.ptr, gij.n, c, 1, r, 1, bfr); struct mij *pg = (struct mij *)gij.ptr; - for (i = 0; i < gij.n; i++) - pg[i].idx = i; + for (i = 0; i < gij.n; i++) pg[i].idx = i; for (uint p = 0; p < cr->comm.np; p++) { // Calculate dot product of each row of W with columns of G @@ -93,22 +92,18 @@ static int sparse_gemm(struct par_mat *WG, const struct par_mat *W, m.c = pg[s].c, m.v = 0; for (j = W->adj_off[i], je = W->adj_off[i + 1]; j < je; j++) { ulong k = W->cols[W->adj_idx[j]]; - while (e < gij.n && pg[s].c == pg[e].c && pg[e].r < k) - e++; + while (e < gij.n && pg[s].c == pg[e].c && pg[e].r < k) e++; if (e < gij.n && pg[s].c == pg[e].c && pg[e].r == k) m.v += W->adj_val[j] * pg[e].v; } - while (e < gij.n && pg[s].c == pg[e].c) - e++; - if (fabs(m.v) > 1e-12) - array_cat(struct mij, &sij, &m, 1); + while (e < gij.n && pg[s].c == pg[e].c) e++; + if (fabs(m.v) > 1e-12) array_cat(struct mij, &sij, &m, 1); s = e; } } sint next = (cr->comm.id + 1) % cr->comm.np; - for (i = 0; i < gij.n; i++) - pg[i].p = next; + for (i = 0; i < gij.n; i++) pg[i].p = next; sarray_transfer(struct mij, &gij, p, 0, cr); sarray_sort(struct mij, gij.ptr, gij.n, idx, 0, bfr); @@ -178,8 +173,7 @@ static uint mg_setup_aux(struct mg *d, const int factor, struct crystal *cr, // Setup gs ids for coarse level (rhs interpolation ) ids = (slong *)trealloc(slong, ids, k + M->rn); - for (i = 0; i < M->rn; i++) - ids[k++] = M->rows[i]; + for (i = 0; i < M->rn; i++) ids[k++] = M->rows[i]; d->levels[lvl - 1]->J = gs_setup(ids, k, c, 0, gs_pairwise, 0); free(ids); @@ -248,15 +242,12 @@ struct mg *mg_setup(const struct par_mat *M, const int factor, // void mg_vcycle(scalar *u1, scalar *rhs, struct mg *d, struct comm *c, buffer *bfr) { - if (d->nlevels == 0) - return; + if (d->nlevels == 0) return; uint *lvl_off = d->level_off, nnz = lvl_off[d->nlevels]; scalar *r = d->buf; - for (uint i = 0; i < 4 * nnz; i++) - r[i] = 0; - for (uint i = 0; i < lvl_off[1]; i++) - r[i] = rhs[i]; + for (uint i = 0; i < 4 * nnz; i++) r[i] = 0; + for (uint i = 0; i < lvl_off[1]; i++) r[i] = rhs[i]; scalar *s = r + nnz, *Gs = s + nnz, *u = Gs + nnz, *wrk = u + nnz; @@ -270,15 +261,13 @@ void mg_vcycle(scalar *u1, scalar *rhs, struct mg *d, struct comm *c, // u = sigma * inv(D) * rhs scalar sigma = sigma_cheb(1, l->npres + 1, 1, 2); - for (j = 0; j < n; j++) - u[off + j] = sigma * r[off + j] / M->diag_val[j]; + for (j = 0; j < n; j++) u[off + j] = sigma * r[off + j] / M->diag_val[j]; // G*u mat_vec_csr(Gs + off, u + off, M, l->Q, wrk, bfr); // r = rhs - Gu - for (j = 0; j < n; j++) - r[off + j] = r[off + j] - Gs[off + j]; + for (j = 0; j < n; j++) r[off + j] = r[off + j] - Gs[off + j]; for (i = 1; i <= l->npres - 1; i++) { sigma = sigma_cheb(i + 1, l->npres + 1, 1, 2); @@ -292,8 +281,7 @@ void mg_vcycle(scalar *u1, scalar *rhs, struct mg *d, struct comm *c, // r = r - Gs mat_vec_csr(Gs + off, s + off, M, l->Q, wrk, bfr); - for (j = 0; j < n; j++) - r[off + j] = r[off + j] - Gs[off + j]; + for (j = 0; j < n; j++) r[off + j] = r[off + j] - Gs[off + j]; } // Interpolate to coarser level @@ -321,35 +309,26 @@ void mg_vcycle(scalar *u1, scalar *rhs, struct mg *d, struct comm *c, // u = u + over*S*J*e n = lvl_off[lvl + 1] - off; - for (j = 0; j < n; j++) - r[off + j] = l->over * r[off + j] + u[off + j]; + for (j = 0; j < n; j++) r[off + j] = l->over * r[off + j] + u[off + j]; } // Avoid this - for (i = 0; i < lvl_off[1]; i++) - u1[i] = r[i]; + for (i = 0; i < lvl_off[1]; i++) u1[i] = r[i]; } void mg_free(struct mg *d) { if (d != NULL) { struct mg_lvl **l = d->levels; for (uint i = 0; i < d->nlevels; i++) { - if (i > 0 && l[i]->M != NULL) - par_mat_free(l[i]->M), free(l[i]->M); - if (l[i]->J != NULL) - gs_free(l[i]->J), l[i]->J = NULL; - if (l[i]->Q != NULL) - gs_free(l[i]->Q), l[i]->Q = NULL; - if (l[i] != NULL) - free(l[i]), l[i] = NULL; + if (i > 0 && l[i]->M != NULL) par_mat_free(l[i]->M), free(l[i]->M); + if (l[i]->J != NULL) gs_free(l[i]->J), l[i]->J = NULL; + if (l[i]->Q != NULL) gs_free(l[i]->Q), l[i]->Q = NULL; + if (l[i] != NULL) free(l[i]), l[i] = NULL; } - if (d->levels != NULL) - free(d->levels), d->levels = NULL; - if (d->level_off != NULL) - free(d->level_off), d->level_off = NULL; - if (d->buf != NULL) - free(d->buf), d->buf = NULL; + if (d->levels != NULL) free(d->levels), d->levels = NULL; + if (d->level_off != NULL) free(d->level_off), d->level_off = NULL; + if (d->buf != NULL) free(d->buf), d->buf = NULL; free(d); } } diff --git a/src/parrsb.c b/src/parrsb.c index d68dec00..2d7ac084 100644 --- a/src/parrsb.c +++ b/src/parrsb.c @@ -139,8 +139,7 @@ static size_t load_balance(struct array *elist, uint nel, int nv, for (int v = 0; v < nv; v++) for (int n = 0; n < ndim; n++) pe->coord[n] += xyz[e * ndim * nv + v * ndim + n]; - for (int n = 0; n < ndim; n++) - pe->coord[n] /= nv; + for (int n = 0; n < ndim; n++) pe->coord[n] /= nv; } array_cat_(unit_size, elist, pe, 1, __FILE__, __LINE__); @@ -149,8 +148,7 @@ static size_t load_balance(struct array *elist, uint nel, int nv, if (vtx != NULL) { // RSB struct rsb_element *pr = (struct rsb_element *)elist->ptr; for (uint e = 0; e < nel; e++) { - for (int v = 0; v < nv; v++) - pr[e].vertices[v] = vtx[e * nv + v]; + for (int v = 0; v < nv; v++) pr[e].vertices[v] = vtx[e * nv + v]; } } @@ -195,7 +193,7 @@ static void initialize_node_aux(struct comm *c, const struct comm *const gc) { #endif } -static void initialize_levels(struct comm *const comms, int *const levels_, +static void initialize_levels(struct comm *const comms, int *const levels_in, const struct comm *const c, const int verbose) { // Level 1 communicator is the global communicator. comm_dup(&comms[0], c); @@ -205,11 +203,11 @@ static void initialize_levels(struct comm *const comms, int *const levels_, // Find the number of nodes under the global communicator and number of MPI // ranks in the node level communicator. - uint nnodes, nranks_per_node; + uint num_nodes, nranks_per_node; { sint in = (nc.id == 0), wrk; comm_allreduce(c, gs_int, gs_add, &in, 1, &wrk); - nnodes = in; + num_nodes = in; nranks_per_node = nc.np; // Check invariant: nranks_per_node should be the same across all the nodes. @@ -221,48 +219,19 @@ static void initialize_levels(struct comm *const comms, int *const levels_, assert(nranks_per_node > 0); parrsb_print(c, verbose, "initialize_levels: num_nodes = %u, num_ranks_per_node = %u", - nnodes, nranks_per_node); + num_nodes, nranks_per_node); } - // Check if there are custom levels specified by the user. Size of the - // partition (in terms of number of nodes) in a given level must be a - // multiple of the partition size of the next level. - sint levels; - uint sizes[2] = {nnodes, 1}; - { - const uint size_max = sizeof(sizes) / sizeof(sizes[0]); - uint start = 1; - while (start < size_max && sizes[start] >= sizes[0]) - start++; - while (start < size_max && sizes[0] % sizes[start]) - ++start; - - uint level = 1; - for (; start < size_max; ++start, ++level) - sizes[level] = sizes[start]; - // Set the size of the last partition to 1 (since it is the node level - // partitioner). - sizes[level - 1] = 1; - - // Check assert: sizes should be strictly decreasing. - for (uint i = 1; i < level; i++) - assert(sizes[i - 1] > sizes[i]); - - levels = level; - } - - for (sint level = 1; level < levels - 1; ++level) { - comm_split(&comms[level - 1], - comms[level - 1].id / (sizes[level] * nranks_per_node), - comms[level - 1].id, &comms[level]); - } - levels = MIN(levels, *levels_); - if (levels > 1) - comm_dup(&comms[levels - 1], &nc); - *levels_ = levels; - parrsb_print(c, verbose, "initialize_levels: levels = %u", levels); + // Hardcode the maximum number of levels to two for now. + sint levels = 2; + uint sizes[2] = {num_nodes, 1}; + *levels_in = levels = MIN(levels, *levels_in); + + if (levels > 1) comm_dup(&comms[levels - 1], &nc); comm_free(&nc); + + parrsb_print(c, verbose, "initialize_levels: levels = %u", levels); } static void parrsb_part_mesh_v0(int *part, const long long *const vtx, @@ -279,8 +248,7 @@ static void parrsb_part_mesh_v0(int *part, const long long *const vtx, "parrsb_part_mesh_v0: Both vertices and coordinates can't be NULL"); MPI_Abort(c->c, EXIT_FAILURE); } - if (xyz == NULL) - options->rsb_pre = 0; + if (xyz == NULL) options->rsb_pre = 0; struct array elist; size_t esize = load_balance(&elist, nel, nv, xyz, vtx, verbose, cr, bfr); @@ -305,23 +273,15 @@ static void parrsb_part_mesh_v0(int *part, const long long *const vtx, if (elist.n > 0) { int ndim = (nv == 8) ? 3 : 2; switch (options->partitioner) { - case 0: - rsb(&elist, nv, options, comms, bfr); - break; - case 1: - rcb(&elist, esize, ndim, &ca, bfr); - break; - case 2: - rib(&elist, esize, ndim, &ca, bfr); - break; - default: - break; + case 0: rsb(&elist, nv, options, comms, bfr); break; + case 1: rcb(&elist, esize, ndim, &ca, bfr); break; + case 2: rib(&elist, esize, ndim, &ca, bfr); break; + default: break; } } comm_free(&ca); - for (uint l = 0; l < (uint)options->levels; l++) - comm_free(&comms[l]); + for (uint l = 0; l < (uint)options->levels; l++) comm_free(&comms[l]); parrsb_print(c, verbose, "parrsb_part_mesh_v0: restore original input"); restore_original(part, cr, &elist, esize, bfr); @@ -369,8 +329,7 @@ void parrsb_check_tagged_partitions(const long long *const eids, slong start = out[0][0]; slong *lids = tcalloc(slong, nel); - for (uint i = 0; i < nel; i++) - lids[i] = start + i; + for (uint i = 0; i < nel; i++) lids[i] = start + i; gse = gs_setup(lids, nel, c, 0, gs_pairwise, 0); free(lids); @@ -384,8 +343,7 @@ void parrsb_check_tagged_partitions(const long long *const eids, sint *mul = tcalloc(sint, size); { struct gs_data *gsl = gs_setup(vtx, size, &lc, 0, gs_pairwise, 0); - for (uint i = 0; i < size; i++) - mul[i] = 1; + for (uint i = 0; i < size; i++) mul[i] = 1; gs(mul, gs_int, gs_add, 0, gsl, &bfr); gs_free(gsl); } @@ -404,8 +362,7 @@ void parrsb_check_tagged_partitions(const long long *const eids, gs(lmin, gs_int, gs_min, 0, gse, &bfr); gs(lmax, gs_int, gs_max, 0, gse, &bfr); - for (uint e = 0; e < nel; e++) - assert(lmin[e] == lmax[e]); + for (uint e = 0; e < nel; e++) assert(lmin[e] == lmax[e]); } free(lmin), free(lmax); @@ -466,8 +423,7 @@ static void parrsb_part_mesh_v1(int *part, const long long *const vtx, if (unique.n > 0) { in = 1; for (uint i = 1; i < unique.n; i++) { - if (pu[i].tag > pu[i - 1].tag) - in++; + if (pu[i].tag > pu[i - 1].tag) in++; } } @@ -493,8 +449,7 @@ static void parrsb_part_mesh_v1(int *part, const long long *const vtx, if (unique.n > 0) { pu[0].tagn = start; for (uint i = 1; i < unique.n; i++) { - if (pu[i].tag > pu[i - 1].tag) - start++; + if (pu[i].tag > pu[i - 1].tag) start++; pu[i].tagn = start; } } @@ -511,8 +466,7 @@ static void parrsb_part_mesh_v1(int *part, const long long *const vtx, for (uint i = 0, s = 0; i < unique.n; i++) { uint e = s + 1; assert(pt[s].tag == pu[i].tag); - while (e < tags.n && pt[e].tag == pu[i].tag) - e++; + while (e < tags.n && pt[e].tag == pu[i].tag) e++; for (uint j = s; j < e; j++) pt[j].p = chunk_size * pu[i].tagn + pt[i].seq % chunk_size; s = e; @@ -600,8 +554,7 @@ static void parrsb_part_mesh_v1(int *part, const long long *const vtx, sarray_sort(struct element_t, elements.ptr, elements.n, seq, 0, bfr); const struct element_t *const pe = (const struct element_t *const)elements.ptr; - for (uint i = 0; i < nel; i++) - part[i] = pe[i].part; + for (uint i = 0; i < nel; i++) part[i] = pe[i].part; } array_free(&elements); @@ -616,8 +569,7 @@ static void update_frontier(sint *const target, sint *const hop, if (*target >= 0) { // Check invariant: *hop < INT_MAX assert(*hop < INT_MAX); - for (uint i = 0; i < nv; i++) - frontier[i] = *target; + for (uint i = 0; i < nv; i++) frontier[i] = *target; return; } @@ -652,12 +604,10 @@ static void update_frontier(sint *const target, sint *const hop, current_target = pd[i].target, current_count = 1; } } - if (current_count > final_count) - final_target = current_target; + if (current_count > final_count) final_target = current_target; // Update frontier, target and hop. - for (uint j = 0; j < nv; j++) - frontier[j] = final_target; + for (uint j = 0; j < nv; j++) frontier[j] = final_target; *target = final_target, *hop = hid + 1; } @@ -673,8 +623,7 @@ void parrsb_part_solid(int *part, const long long *const vtx2, parrsb_print(&c, 1, "Running greedy solid ... nel1 = %d nel2 = %d", nel1, nel2); - for (uint i = 0; i < nel2; i++) - part[i] = -1; + for (uint i = 0; i < nel2; i++) part[i] = -1; buffer bfr; buffer_init(&bfr, 1024); @@ -706,10 +655,8 @@ void parrsb_part_solid(int *part, const long long *const vtx2, struct gs_data *gsh = NULL; { slong *vtx = tcalloc(slong, size); - for (size_t i = 0; i < size1; i++) - vtx[i] = vtx1[i]; - for (size_t i = 0; i < size2; i++) - vtx[size1 + i] = vtx2[i]; + for (size_t i = 0; i < size1; i++) vtx[i] = vtx1[i]; + for (size_t i = 0; i < size2; i++) vtx[size1 + i] = vtx2[i]; gsh = gs_setup(vtx, size, &c, 0, gs_pairwise, 0); free(vtx); @@ -726,8 +673,7 @@ void parrsb_part_solid(int *part, const long long *const vtx2, sint *const component = tcalloc(sint, size); if (c.id + 1 == (uint)idmin) { - for (uint i = 0; i < nv; i++) - component[i] = 1; + for (uint i = 0; i < nv; i++) component[i] = 1; } slong marked0 = 0, marked1 = 1; @@ -738,11 +684,9 @@ void parrsb_part_solid(int *part, const long long *const vtx2, marked0 = marked1, marked1 = 0; for (uint i = 0; i < nel1 + nel2; i++) { sint v = 0; - for (uint j = 0; j < nv; j++) - v += component[i * nv + j]; + for (uint j = 0; j < nv; j++) v += component[i * nv + j]; if (v > 0) { - for (uint j = 0; j < nv; j++) - component[i * nv + j] = 1; + for (uint j = 0; j < nv; j++) component[i * nv + j] = 1; marked1 += 1; } } @@ -814,8 +758,7 @@ void parrsb_part_solid(int *part, const long long *const vtx2, // check for that later when we actually assign the elements to partitions. { sint id = c.id, hid = 0; - if (nrecv2 == nexp2) - id = -1, hid = INT_MAX; + if (nrecv2 == nexp2) id = -1, hid = INT_MAX; // Max id should be >= 0; sint wrk, idmax = id; @@ -823,14 +766,10 @@ void parrsb_part_solid(int *part, const long long *const vtx2, assert(idmax >= 0); // Initialize frontier, target, and hop. - for (uint i = 0; i < size1; i++) - frontier[i] = id; - for (uint i = size1; i < size; i++) - frontier[i] = -1; - for (uint i = 0; i < nel1; i++) - target[i] = id, hop[i] = hid; - for (uint i = nel1; i < nelt; i++) - target[i] = -1, hop[i] = INT_MAX; + for (uint i = 0; i < size1; i++) frontier[i] = id; + for (uint i = size1; i < size; i++) frontier[i] = -1; + for (uint i = 0; i < nel1; i++) target[i] = id, hop[i] = hid; + for (uint i = nel1; i < nelt; i++) target[i] = -1, hop[i] = INT_MAX; } // Then perform a BFS till we assign all the elements in the solid mesh with @@ -863,8 +802,7 @@ void parrsb_part_solid(int *part, const long long *const vtx2, { struct elem_t et = {.part = -1}; for (uint i = 0; i < nel2; i++) { - if (part[i] >= 0) - continue; + if (part[i] >= 0) continue; et.sequence = i, et.target = target[nel1 + i], et.hop = hop[nel1 + i]; array_cat(struct elem_t, &arr, &et, 1); } @@ -880,8 +818,7 @@ void parrsb_part_solid(int *part, const long long *const vtx2, sarray_sort(struct elem_t, arr.ptr, arr.n, hop, 1, &bfr); struct elem_t *const pa = (struct elem_t *const)arr.ptr; uint keep = MIN(nexp2 - nrecv2, arr.n); - for (uint i = 0; i < keep; i++) - pa[i].part = c.id; + for (uint i = 0; i < keep; i++) pa[i].part = c.id; nrecv2 += keep; // Check for invariant: nrecv2 <= nexp2. assert(nrecv2 <= nexp2); @@ -893,8 +830,7 @@ void parrsb_part_solid(int *part, const long long *const vtx2, sarray_transfer(struct elem_t, &arr, target, 0, &cr); const struct elem_t *const pa = (const struct elem_t *const)arr.ptr; - for (uint j = 0; j < arr.n; j++) - part[pa[j].sequence] = pa[j].part; + for (uint j = 0; j < arr.n; j++) part[pa[j].sequence] = pa[j].part; arr.n = 0; } diff --git a/src/rcb.c b/src/rcb.c index a695ab5f..4c530289 100644 --- a/src/rcb.c +++ b/src/rcb.c @@ -12,24 +12,18 @@ static void get_axis_len(double *length, size_t unit_size, char *elems, struct rcb_element *ei; for (uint i = 0; i < nel; i++) { ei = (struct rcb_element *)(elems + i * unit_size); - if (ei->coord[0] < min[0]) - min[0] = ei->coord[0]; - if (ei->coord[0] > max[0]) - max[0] = ei->coord[0]; - - if (ei->coord[1] < min[1]) - min[1] = ei->coord[1]; - if (ei->coord[1] > max[1]) - max[1] = ei->coord[1]; + if (ei->coord[0] < min[0]) min[0] = ei->coord[0]; + if (ei->coord[0] > max[0]) max[0] = ei->coord[0]; + + if (ei->coord[1] < min[1]) min[1] = ei->coord[1]; + if (ei->coord[1] > max[1]) max[1] = ei->coord[1]; } if (ndim == 3) { for (uint i = 0; i < nel; i++) { ei = (struct rcb_element *)(elems + i * unit_size); - if (ei->coord[2] < min[2]) - min[2] = ei->coord[2]; - if (ei->coord[2] > max[2]) - max[2] = ei->coord[2]; + if (ei->coord[2] < min[2]) min[2] = ei->coord[2]; + if (ei->coord[2] > max[2]) max[2] = ei->coord[2]; } } @@ -39,54 +33,36 @@ static void get_axis_len(double *length, size_t unit_size, char *elems, comm_allreduce(c, gs_double, gs_max, max, 3, wrk); } - for (uint i = 0; i < ndim; i++) - length[i] = max[i] - min[i]; + for (uint i = 0; i < ndim; i++) length[i] = max[i] - min[i]; } void rcb_local(struct array *a, size_t unit_size, uint start, uint end, int ndim, buffer *buf) { sint size = end - start; - if (size <= 1) - return; + if (size <= 1) return; double length[3]; char *st = (char *)a->ptr + unit_size * start; get_axis_len(length, unit_size, st, size, ndim, NULL); int axis = 0; - if (length[1] > length[0]) - axis = 1; + if (length[1] > length[0]) axis = 1; if (ndim == 3) - if (length[2] > length[axis]) - axis = 2; + if (length[2] > length[axis]) axis = 2; if (unit_size == sizeof(struct rcb_element)) { switch (axis) { - case 0: - sarray_sort(struct rcb_element, st, size, coord[0], 3, buf); - break; - case 1: - sarray_sort(struct rcb_element, st, size, coord[1], 3, buf); - break; - case 2: - sarray_sort(struct rcb_element, st, size, coord[2], 3, buf); - break; - default: - break; + case 0: sarray_sort(struct rcb_element, st, size, coord[0], 3, buf); break; + case 1: sarray_sort(struct rcb_element, st, size, coord[1], 3, buf); break; + case 2: sarray_sort(struct rcb_element, st, size, coord[2], 3, buf); break; + default: break; } } else if (unit_size == sizeof(struct rsb_element)) { switch (axis) { - case 0: - sarray_sort(struct rsb_element, st, size, coord[0], 3, buf); - break; - case 1: - sarray_sort(struct rsb_element, st, size, coord[1], 3, buf); - break; - case 2: - sarray_sort(struct rsb_element, st, size, coord[2], 3, buf); - break; - default: - break; + case 0: sarray_sort(struct rsb_element, st, size, coord[0], 3, buf); break; + case 1: sarray_sort(struct rsb_element, st, size, coord[1], 3, buf); break; + case 2: sarray_sort(struct rsb_element, st, size, coord[2], 3, buf); break; + default: break; } } @@ -97,16 +73,14 @@ void rcb_local(struct array *a, size_t unit_size, uint start, uint end, static int rcb_level(struct array *a, size_t unit_size, int ndim, struct comm *c, buffer *bfr) { - if (c->np == 1) - return 0; + if (c->np == 1) return 0; double length[3]; get_axis_len(length, unit_size, (char *)a->ptr, a->n, ndim, c); int axis = 0, d; for (d = 1; d < ndim; d++) - if (length[d] > length[axis]) - axis = d; + if (length[d] > length[axis]) axis = d; if (unit_size == sizeof(struct rcb_element)) { switch (axis) { @@ -119,8 +93,7 @@ static int rcb_level(struct array *a, size_t unit_size, int ndim, case 2: parallel_sort(struct rcb_element, a, coord[2], gs_double, 0, 1, c, bfr); break; - default: - break; + default: break; } } else if (unit_size == sizeof(struct rsb_element)) { switch (axis) { @@ -133,8 +106,7 @@ static int rcb_level(struct array *a, size_t unit_size, int ndim, case 2: parallel_sort(struct rsb_element, a, coord[2], gs_double, 0, 1, c, bfr); break; - default: - break; + default: break; } } diff --git a/src/rib.c b/src/rib.c index b64d8771..385d6bb3 100644 --- a/src/rib.c +++ b/src/rib.c @@ -28,8 +28,7 @@ static void get_rib_axis(char *elems, uint nel, size_t unit_size, int ndim, avg[2] /= nelg; double I[3][3]; - for (i = 0; i < 3; i++) - I[i][0] = I[i][1] = I[i][2] = 0.0; + for (i = 0; i < 3; i++) I[i][0] = I[i][1] = I[i][2] = 0.0; double x, y, z; for (i = 0; i < nel; i++) { @@ -42,8 +41,7 @@ static void get_rib_axis(char *elems, uint nel, size_t unit_size, int ndim, I[2][0] += z * x, I[2][1] += z * y, I[2][2] += z * z; } - if (c != NULL) - comm_allreduce(c, gs_double, gs_add, I, 9, buf); + if (c != NULL) comm_allreduce(c, gs_double, gs_add, I, 9, buf); double ev[3]; // ev[2] = 0 if 2D power_serial(ev, ndim, (double *)I, 0); // FIXME: 2D does not work @@ -60,8 +58,7 @@ static void get_rib_axis(char *elems, uint nel, size_t unit_size, int ndim, void rib_local(struct array *a, size_t unit_size, uint start, uint end, int ndim, buffer *buf) { sint size = end - start; - if (size <= 1) - return; + if (size <= 1) return; char *st = (char *)a->ptr + unit_size * start; get_rib_axis(st, size, unit_size, ndim, NULL); @@ -78,8 +75,7 @@ void rib_local(struct array *a, size_t unit_size, uint start, uint end, static int rib_level(struct array *a, size_t unit_size, int ndim, struct comm *c, buffer *bfr) { - if (c->np == 1) - return 0; + if (c->np == 1) return 0; get_rib_axis((char *)a->ptr, a->n, unit_size, ndim, c); diff --git a/src/rsb.c b/src/rsb.c index 066b00fe..e148e80d 100644 --- a/src/rsb.c +++ b/src/rsb.c @@ -52,11 +52,9 @@ static void check_rsb_partition(const struct comm *gc, sint converged = 1; int val = (int)metric_get_value(i, RSB_FIEDLER_CALC_NITER); if (opts->rsb_algo == 0) { - if (val == miter * mpass) - converged = 0; + if (val == miter * mpass) converged = 0; } else if (opts->rsb_algo == 1) { - if (val == mpass) - converged = 0; + if (val == mpass) converged = 0; } struct comm c; @@ -104,25 +102,18 @@ static void check_rsb_partition(const struct comm *gc, } } -static int check_bin_val(int bin, struct comm *c) { - if (bin < 0 || bin > 1) { - if (c->id == 0) { - printf("%s:%d bin value out of range: %d\n", __FILE__, __LINE__, bin); - fflush(stdout); - } - return 1; - } +static inline int check_bin_val(int bin) { + if (bin < 0 || bin > 1) return 1; return 0; } static int balance_partitions(struct array *elements, unsigned nv, struct comm *lc, struct comm *gc, int bin, buffer *bfr) { - // Return if there is only one processor. - if (gc->np == 1) - return 0; + // Return if there is only one processor (or partition). + if (gc->np == 1 || gc->np == lc->np) return 0; - assert(check_bin_val(bin, gc) == 0); + assert(check_bin_val(bin) == 0 && "Invalid bin value !"); struct ielem_t { uint index, orig; @@ -131,54 +122,49 @@ static int balance_partitions(struct array *elements, unsigned nv, }; // Calculate expected # of elements per processor. - uint ne = elements->n; + size_t ne = elements->n; slong nelgt = ne, nglob = ne, wrk; comm_allreduce(lc, gs_long, gs_add, &nelgt, 1, &wrk); comm_allreduce(gc, gs_long, gs_add, &nglob, 1, &wrk); sint ne_ = nglob / gc->np, nrem = nglob - ne_ * gc->np; slong nelgt_exp = ne_ * lc->np + nrem / 2 + (nrem % 2) * (1 - bin); - slong send_cnt = nelgt - nelgt_exp > 0 ? nelgt - nelgt_exp : 0; + slong send_cnt = (nelgt - nelgt_exp) > 0 ? (nelgt - nelgt_exp) : 0; // Setup gather-scatter. size_t size = ne * nv; - uint e, v; slong *ids = tcalloc(slong, size); struct rsb_element *elems = (struct rsb_element *)elements->ptr; - for (e = 0; e < ne; e++) { - for (v = 0; v < nv; v++) - ids[e * nv + v] = elems[e].vertices[v]; + for (uint e = 0; e < ne; e++) { + for (uint v = 0; v < nv; v++) ids[e * nv + v] = elems[e].vertices[v]; } struct gs_data *gsh = gs_setup(ids, size, gc, 0, gs_pairwise, 0); sint *input = (sint *)ids; if (send_cnt > 0) { - for (e = 0; e < size; e++) - input[e] = 0; + for (uint e = 0; e < size; e++) input[e] = 0; } else { - for (e = 0; e < size; e++) - input[e] = 1; + for (uint e = 0; e < size; e++) input[e] = 1; } gs(input, gs_int, gs_add, 0, gsh, bfr); - for (e = 0; e < ne; e++) - elems[e].proc = gc->id; + for (uint e = 0; e < ne; e++) elems[e].proc = gc->id; - sint sid = (send_cnt == 0) ? gc->id : INT_MAX, balanced = 0; + sint sid = (send_cnt == 0) ? gc->id : INT_MAX; comm_allreduce(gc, gs_int, gs_min, &sid, 1, &wrk); struct crystal cr; + sint balanced = 0; if (send_cnt > 0) { struct array ielems; array_init(struct ielem_t, &ielems, 10); - struct ielem_t ielem = { - .index = 0, .orig = lc->id, .dest = -1, .fiedler = 0}; + struct ielem_t ielem = {.orig = lc->id, .dest = -1}; int mul = (sid == 0) ? 1 : -1; - for (e = 0; e < ne; e++) { - for (v = 0; v < nv; v++) { + for (uint e = 0; e < ne; e++) { + for (uint v = 0; v < nv; v++) { if (input[e * nv + v] > 0) { ielem.index = e, ielem.fiedler = mul * elems[e].fiedler; array_cat(struct ielem_t, &ielems, &ielem, 1); @@ -200,7 +186,7 @@ static int balance_partitions(struct array *elements, unsigned nv, if (out[1][0] >= send_cnt) { balanced = 1; struct ielem_t *ptr = ielems.ptr; - for (e = 0; start + e < send_cnt && e < ielems.n; e++) + for (uint e = 0; start + e < send_cnt && e < ielems.n; e++) ptr[e].dest = sid + (start + e) / part_size; crystal_init(&cr, lc); @@ -208,9 +194,8 @@ static int balance_partitions(struct array *elements, unsigned nv, crystal_free(&cr); ptr = ielems.ptr; - for (e = 0; e < ielems.n; e++) - if (ptr[e].dest != -1) - elems[ptr[e].index].proc = ptr[e].dest; + for (uint e = 0; e < ielems.n; e++) + if (ptr[e].dest != -1) elems[ptr[e].index].proc = ptr[e].dest; } array_free(&ielems); @@ -235,83 +220,34 @@ static int balance_partitions(struct array *elements, unsigned nv, return 0; } -static int repair_partitions_v2(struct array *elems, unsigned nv, - struct comm *tc, struct comm *lc, unsigned bin, - unsigned algo, buffer *bfr) { - assert(check_bin_val(bin, lc) == 0); - - sint nc = get_components_v2(NULL, elems, nv, tc, bfr, 0), wrk; - comm_allreduce(lc, gs_int, gs_max, &nc, 1, &wrk); - if (nc > 1) { - // If nc > 1, send elements back and do RCBx, RCBy and RCBz - struct crystal cr; - crystal_init(&cr, lc); - sarray_transfer(struct rsb_element, elems, proc, 0, &cr); - crystal_free(&cr); - - // Do rcb or rib - unsigned ndim = (nv == 8) ? 3 : 2; - switch (algo) { - case 0: - parallel_sort(struct rsb_element, elems, globalId, gs_long, 0, 1, lc, - bfr); - break; - case 1: - rcb(elems, sizeof(struct rsb_element), ndim, lc, bfr); - break; - case 2: - rib(elems, sizeof(struct rsb_element), ndim, lc, bfr); - break; - default: - break; - } - - // And count number of components again. If nc > 1 still, set - // isconnected = 1 - nc = get_components_v2(NULL, elems, nv, tc, bfr, 0); - comm_allreduce(lc, gs_int, gs_max, &nc, 1, &wrk); - } - - return 0; -} - -static sint get_bisect_comm(struct comm *const tc, const struct comm *const lc, - const uint level, const uint levels, - const struct comm comms[3]) { - sint pid, psize; +static sint get_bin(const struct comm *const lc, const uint level, + const uint levels, const struct comm comms[3]) { + sint psize = lc->np, pid = lc->id; if (level < levels - 1) { sint out[2][1], wrk[2][1], in = (comms[level + 1].id == 0); - comm_scan(out, &comms[level], gs_int, gs_add, &in, 1, wrk); + comm_scan(out, lc, gs_int, gs_add, &in, 1, wrk); psize = out[1][0], pid = (comms[level + 1].id == 0) * out[0][0]; comm_allreduce(&comms[level + 1], gs_int, gs_max, &pid, 1, wrk); - } else { - psize = lc->np, pid = lc->id; } - const sint bin = (pid >= (psize + 1) / 2); - comm_split(lc, bin, lc->id, tc); - return bin; + return (pid >= (psize + 1) / 2); } static uint get_level_cuts(const uint level, const uint levels, const struct comm comms[3]) { - uint n; + uint n = comms[level].np; if (level < levels - 1) { sint size = (comms[level + 1].id == 0), wrk; comm_allreduce(&comms[level], gs_int, gs_add, &size, 1, &wrk); n = size; - } else { - n = comms[level].np; } sint cuts = 0; uint pow2 = 1; - while (pow2 < n) - pow2 <<= 1, cuts++; + while (pow2 < n) pow2 <<= 1, cuts++; sint wrk; comm_allreduce(&comms[0], gs_int, gs_max, &cuts, 1, &wrk); - return cuts; } @@ -331,7 +267,9 @@ void rsb(struct array *elements, int nv, const parrsb_options *const options, comm_dup(&lc, &comms[level]); for (uint cut = 0; cut < ncuts; cut++) { // Run the pre-partitioner. - parrsb_print(gc, verbose - 1, "\trsb: Pre-partition ..."); + parrsb_print(gc, verbose - 1, + "\trsb: level = %d, cut = %d, Pre-partition ...", level + 1, + cut + 1); metric_tic(&lc, RSB_PRE); switch (options->rsb_pre) { @@ -339,40 +277,41 @@ void rsb(struct array *elements, int nv, const parrsb_options *const options, parallel_sort(struct rsb_element, elements, globalId, gs_long, 0, 1, &lc, bfr); break; - case 1: - rcb(elements, sizeof(struct rsb_element), ndim, &lc, bfr); - break; - case 2: - rib(elements, sizeof(struct rsb_element), ndim, &lc, bfr); - break; - default: - break; + case 1: rcb(elements, sizeof(struct rsb_element), ndim, &lc, bfr); break; + case 2: rib(elements, sizeof(struct rsb_element), ndim, &lc, bfr); break; + default: break; } metric_toc(&lc, RSB_PRE); struct rsb_element *const pe = (struct rsb_element *const)elements->ptr; - for (unsigned i = 0; i < elements->n; i++) - pe[i].proc = lc.id; + for (unsigned i = 0; i < elements->n; i++) pe[i].proc = lc.id; // Find the Fiedler vector. - parrsb_print(gc, verbose - 1, "\trsb: Fiedler ... "); + parrsb_print(gc, verbose - 1, "\trsb: level = %d, cut = %d, Fiedler ... ", + level + 1, cut + 1); metric_tic(&lc, RSB_FIEDLER); fiedler(elements, nv, options, &lc, bfr, verbose - 2); metric_toc(&lc, RSB_FIEDLER); - // Sort by Fiedler vector. - parrsb_print(gc, verbose - 1, "\trsb: Sort ..."); + // Sort by Fiedler value. + parrsb_print(gc, verbose - 1, "\trsb: level = %d, cut = %d, Sort ...", + level + 1, cut + 1); metric_tic(&lc, RSB_SORT); parallel_sort(struct rsb_element, elements, fiedler, gs_double, 0, 1, &lc, bfr); metric_toc(&lc, RSB_SORT); - // `tc` is the new communicator in newly found partitions. + // Get the bin of the current process. + sint bin = get_bin(&lc, level, levels, comms); + + // Create the new communicator `tc`. struct comm tc; - sint bin = get_bisect_comm(&tc, &lc, level, levels, comms); + comm_split(&lc, bin, lc.id, &tc); // Find the number of disconnected components. - parrsb_print(gc, verbose - 1, "\trsb: Components ..."); + parrsb_print(gc, verbose - 1, + "\trsb: level = %d, cut = %d, Components ...", level + 1, + cut + 1); metric_tic(&lc, RSB_COMPONENTS); const uint ncomp = get_components_v2(NULL, elements, nv, &tc, bfr, verbose - 2); @@ -380,13 +319,15 @@ void rsb(struct array *elements, int nv, const parrsb_options *const options, metric_toc(&lc, RSB_COMPONENTS); // Bisect and balance. - parrsb_print(gc, verbose - 1, "\trsb: Balance ..."); + parrsb_print(gc, verbose - 1, "\trsb: level = %d, cut = %d, Balance ...", + level + 1, cut + 1); metric_tic(&lc, RSB_BALANCE); balance_partitions(elements, nv, &tc, &lc, bin, bfr); metric_toc(&lc, RSB_BALANCE); // Split the communicator and recurse on the sub-problems. - parrsb_print(gc, verbose - 1, "\trsb: Bisect ..."); + parrsb_print(gc, verbose - 1, "\trsb: level = %d, cut = %d, Bisect ...", + level + 1, cut + 1); comm_free(&lc), comm_dup(&lc, &tc), comm_free(&tc); const uint nbrs = parrsb_get_neighbors(elements, nv, gc, &lc, bfr); diff --git a/src/sort-bin.c b/src/sort-bin.c index cb6c6d9e..cdc3b5b8 100644 --- a/src/sort-bin.c +++ b/src/sort-bin.c @@ -11,8 +11,7 @@ static uint *set_proc_from_val(struct sort *s, uint field, double range = extrema[1] - extrema[0]; uint size = a->n; - if (size == 0) - return NULL; + if (size == 0) return NULL; uint *proc = tcalloc(uint, size); uint np = c->np; @@ -29,8 +28,7 @@ static uint *set_proc_from_val(struct sort *s, uint field, } id++; } while (id < np && index < size); - for (; index < size; index++) - proc[index] = np - 1; + for (; index < size; index++) proc[index] = np - 1; return proc; } diff --git a/src/sort-hypercube.c b/src/sort-hypercube.c index d8f01a52..3e8e8a06 100644 --- a/src/sort-hypercube.c +++ b/src/sort-hypercube.c @@ -11,10 +11,8 @@ struct hypercube { static void init_probes(struct hypercube *data, const struct comm *c) { // Allocate space for probes and counts. int nprobes = data->nprobes = 3; - if (!data->probes) - data->probes = tcalloc(double, nprobes); - if (!data->probe_cnt) - data->probe_cnt = tcalloc(ulong, nprobes); + if (!data->probes) data->probes = tcalloc(double, nprobes); + if (!data->probe_cnt) data->probe_cnt = tcalloc(ulong, nprobes); double extrema[2]; get_extrema((void *)extrema, data->data, 0, c); @@ -32,15 +30,13 @@ static void update_probe_counts(struct hypercube *data, const struct comm *c) { gs_dom t = input->t[0]; uint nprobes = data->nprobes; - for (uint i = 0; i < nprobes; i++) - data->probe_cnt[i] = 0; + for (uint i = 0; i < nprobes; i++) data->probe_cnt[i] = 0; struct array *a = input->a; for (uint e = 0; e < a->n; e++) { double val = get_scalar(a, e, offset, input->unit_size, t); for (uint i = 0; i < nprobes; i++) { - if (val < data->probes[i]) - data->probe_cnt[i]++; + if (val < data->probes[i]) data->probe_cnt[i]++; } } @@ -52,8 +48,7 @@ static void update_probes(slong nelem, double *probes, ulong *probe_cnt, uint threshold) { assert(nelem >= 0); slong expected = nelem / 2; - if (llabs(expected - (slong)probe_cnt[1]) < threshold) - return; + if (llabs(expected - (slong)probe_cnt[1]) < threshold) return; if (probe_cnt[1] < (ulong)expected) probes[0] = probes[1]; @@ -88,8 +83,7 @@ static void transfer_elem(const struct hypercube *data, const struct comm *c) { uint *proc1 = set_proc_from_idx(lnp, lstart, lown, lelem); uint *proc2 = set_proc_from_idx(np - lnp, ustart, uppern, uelem); proc1 = trealloc(uint, proc1, size); - for (uint e = lown; e < size; e++) - proc1[e] = proc2[e - lown] + lnp; + for (uint e = lown; e < size; e++) proc1[e] = proc2[e - lown] + lnp; sarray_transfer_chunk(a, usize, proc1, c); free(proc1), free(proc2); @@ -107,13 +101,11 @@ static void parallel_hypercube_sort_aux(struct hypercube *data, slong nelem = out[1][0]; uint threshold = nelem / (10 * c->np); - if (threshold < 2) - threshold = 2; + if (threshold < 2) threshold = 2; sort_local(data->data); - if (c->np == 1) - return; + if (c->np == 1) return; init_probes(data, c); update_probe_counts(data, c); diff --git a/src/sort.c b/src/sort.c index 93b0adb5..e4fb3594 100644 --- a/src/sort.c +++ b/src/sort.c @@ -11,15 +11,9 @@ double get_scalar(struct array *a, uint i, uint offset, uint usize, double data; switch (type) { - case gs_int: - data = *((uint *)v); - break; - case gs_long: - data = *((ulong *)v); - break; - case gs_double: - data = *((double *)v); - break; + case gs_int: data = *((uint *)v); break; + case gs_long: data = *((ulong *)v); break; + case gs_double: data = *((double *)v); break; default: fprintf(stderr, "Error: Unknown type %d\n", type); exit(EXIT_FAILURE); @@ -52,16 +46,14 @@ void get_extrema(void *extrema_, struct sort *data, uint field, } uint *set_proc_from_idx(uint size, sint np_, slong start, slong nelem) { - if (nelem == 0) - return NULL; + if (nelem == 0) return NULL; uint *proc = tcalloc(uint, size + 1); ulong np = np_; ulong nelt = nelem / np, nrem = nelem - np * nelt; assert(nrem < np); if (nrem == 0) { - for (uint i = 0; i < size; i++) - proc[i] = (uint)((start + i) / nelt); + for (uint i = 0; i < size; i++) proc[i] = (uint)((start + i) / nelt); } else { ulong s = np - nrem; ulong t1 = nelt * s; @@ -94,8 +86,7 @@ static int sort_field(struct array *arr, size_t usize, gs_dom t, uint off, case gs_int: // FIXME gs_uint gslib_sortp_ui(buf, keep, (uint *)((char *)ptr + off), nunits, usize); break; - default: - break; + default: break; } return 0; @@ -108,8 +99,7 @@ void sort_local(struct sort *s) { int i = s->nfields - 1; sort_field(a, usize, s->t[i], s->offset[i], buf, 0), i--; - while (i >= 0) - sort_field(a, usize, s->t[i], s->offset[i], buf, 1), i--; + while (i >= 0) sort_field(a, usize, s->t[i], s->offset[i], buf, 1), i--; sarray_permute_buf_(s->align, usize, a->ptr, a->n, buf); } @@ -131,8 +121,7 @@ void sarray_transfer_chunk(struct array *arr, const size_t usize, // Calculate the global array size. If it is zero, nothing to do, just return. slong ng = arr->n, wrk[2]; comm_allreduce(c, gs_long, gs_add, &ng, 1, wrk); - if (ng == 0) - return; + if (ng == 0) return; // Initialize the crystal router. struct crystal cr; @@ -140,8 +129,7 @@ void sarray_transfer_chunk(struct array *arr, const size_t usize, // Allocate `proc` with some buffer space. uint *proc = tcalloc(uint, arr->n + 1); - for (uint i = 0; i < arr->n; i++) - proc[i] = proci[i]; + for (uint i = 0; i < arr->n; i++) proc[i] = proci[i]; // Transfer the array elements to destination processor. To avoid message // sizes larger than INT_MAX, we calculate total message size and then figure @@ -213,14 +201,9 @@ void parallel_sort_(struct array *arr, size_t usize, size_t align, } switch (algo) { - case 0: - parallel_bin_sort(&sd, c); - break; - case 1: - parallel_hypercube_sort(&sd, c); - break; - default: - break; + case 0: parallel_bin_sort(&sd, c); break; + case 1: parallel_hypercube_sort(&sd, c); break; + default: break; } if (balance) { diff --git a/src/statistics.c b/src/statistics.c index f8f8697a..031752e7 100644 --- a/src/statistics.c +++ b/src/statistics.c @@ -51,8 +51,7 @@ uint parrsb_get_neighbors(const struct array *const elems, const unsigned nv, uint s = 0; while (s < vertices.n) { uint e = s + 1; - while (e < vertices.n && pv[s].v == pv[e].v) - e++; + while (e < vertices.n && pv[s].v == pv[e].v) e++; for (uint i = s; i < e; i++) { struct vertex_t vt = pv[i]; for (uint j = s; j < e; j++) { @@ -101,8 +100,7 @@ uint parrsb_get_neighbors(const struct array *const elems, const unsigned nv, un = 1; struct unique_t *pu = (struct unique_t *)unique.ptr; for (uint i = 1; i < unique.n; i++) { - if (pu[i].partition > pu[un - 1].partition) - pu[un] = pu[i], un++; + if (pu[i].partition > pu[un - 1].partition) pu[un] = pu[i], un++; } } array_free(&unique); @@ -127,8 +125,7 @@ struct pgeom_t { }; void parrsb_dump_stats_start(const uint nv_) { - if (pgeom_initialized) - return; + if (pgeom_initialized) return; nv = nv_; level = 0; @@ -140,8 +137,7 @@ void parrsb_dump_stats_start(const uint nv_) { void parrsb_dump_stats(const struct comm *const gc, const struct comm *const lc, const struct array *const elems, buffer *bfr) { - if (!pgeom_initialized) - return; + if (!pgeom_initialized) return; const struct rsb_element *const pe = (const struct rsb_element *const)elems->ptr; @@ -160,15 +156,13 @@ void parrsb_dump_stats(const struct comm *const gc, const struct comm *const lc, min[d] = (min[d] > c) ? c : min[d]; } } - for (uint d = 0; d < ndim; d++) - centroid[d] /= n; + for (uint d = 0; d < ndim; d++) centroid[d] /= n; double wrk[3]; comm_allreduce(lc, gs_double, gs_min, min, ndim, wrk); comm_allreduce(lc, gs_double, gs_max, max, ndim, wrk); comm_allreduce(lc, gs_double, gs_add, centroid, ndim, wrk); - for (uint d = 0; d < ndim; d++) - centroid[d] /= lc->np; + for (uint d = 0; d < ndim; d++) centroid[d] /= lc->np; // Partition root accumulates the partition geometry. level++; @@ -178,13 +172,11 @@ void parrsb_dump_stats(const struct comm *const gc, const struct comm *const lc, .max = {max[0], max[1], max[2]}, .min = {min[0], min[1], min[2]}, .p = 0}; - if (lc->id == 0) - array_cat(struct pgeom_t, &pgeom, &pg, 1); + if (lc->id == 0) array_cat(struct pgeom_t, &pgeom, &pg, 1); } void parrsb_dump_stats_end(const struct comm *const gc, const char *prefix) { - if (!pgeom_initialized) - return; + if (!pgeom_initialized) return; const uint size = strnlen(prefix, 64); assert(size < 64 && "Prefix must be less than 64 characters.");