From 51ab1903e7f1698b63ca0239031ae57e7b2cc5ab Mon Sep 17 00:00:00 2001 From: yamazaki-mitsufumi Date: Thu, 18 Apr 2024 18:20:25 +0900 Subject: [PATCH] Expanding the scop of 2D thread distribution --- driver/level3/level3_thread.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/driver/level3/level3_thread.c b/driver/level3/level3_thread.c index c7ccf84260..9fec7afca2 100644 --- a/driver/level3/level3_thread.c +++ b/driver/level3/level3_thread.c @@ -826,6 +826,16 @@ int CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, IFLOAT *sa, IF if (nthreads_m * nthreads_n > args -> nthreads) { nthreads_n = blas_quickdivide(args -> nthreads, nthreads_m); } + /* The nthreads_m and nthreads_n are adjusted so that the submatrix */ + /* to be handled by each thread preferably becomes a square matrix */ + /* by minimizing an objective function 'n * nthreads_m + m * nthreads_n'. */ + /* Objective function come from sum of partitions in m and n. */ + /* (n / nthreads_n) + (m / nthreads_m) */ + /* = (n * nthreads_m + m * nthreads_n) / (nthreads_n * nthreads_m) */ + while (nthreads_m % 2 == 0 && n * nthreads_m + m * nthreads_n > n * (nthreads_m / 2) + m * (nthreads_n * 2)) { + nthreads_m /= 2; + nthreads_n *= 2; + } } /* Execute serial or parallel computation */