Skip to content
Permalink

Comparing changes

Choose two branches to see what’s changed or to start a new pull request. If you need to, you can also or learn more about diff comparisons.

Open a pull request

Create a new pull request by comparing changes across two branches. If you need to, you can also . Learn more about diff comparisons here.
base repository: etmc/DDalphaAMG
Failed to load repositories. Confirm that selected base ref is valid, then try again.
Loading
base: master
Choose a base ref
...
head repository: sbacchio/DDalphaAMG
Failed to load repositories. Confirm that selected head ref is valid, then try again.
Loading
compare: master
Choose a head ref
Able to merge. These branches can be automatically merged.
  • 9 commits
  • 5 files changed
  • 3 contributors

Commits on Jan 7, 2019

  1. Copy the full SHA
    528b441 View commit details
  2. Adding benchmark code

    sbacchio committed Jan 7, 2019
    Copy the full SHA
    c36d17c View commit details
  3. Fixing options

    sbacchio committed Jan 7, 2019
    Copy the full SHA
    8da85be View commit details

Commits on Jan 8, 2019

  1. Fixing help

    sbacchio committed Jan 8, 2019
    Copy the full SHA
    e535889 View commit details
  2. Copy the full SHA
    f8fed8e View commit details

Commits on Nov 25, 2020

  1. Copy the full SHA
    ee388b6 View commit details

Commits on Dec 17, 2020

  1. Merge pull request sbacchio#6 from sbacchio/benchmark

    Benchmark
    sbacchio authored Dec 17, 2020

    Verified

    This commit was created on GitHub.com and signed with GitHub’s verified signature. The key has expired.
    Copy the full SHA
    1f6d5c9 View commit details

Commits on Feb 4, 2023

  1. Verified

    This commit was signed with the committer’s verified signature.
    kostrzewa Bartosz Kostrzewa
    Copy the full SHA
    ed165e0 View commit details
  2. Merge pull request sbacchio#7 from kostrzewa/fix_main_build

    fix building of test program at link stage
    sbacchio authored Feb 4, 2023

    Verified

    This commit was created on GitHub.com and signed with GitHub’s verified signature. The key has expired.
    Copy the full SHA
    9c723a8 View commit details
Showing with 225 additions and 194 deletions.
  1. +16 −7 src/DDalphaAMG.h
  2. +102 −36 src/DDalphaAMG_interface.c
  3. +3 −3 src/main.c
  4. +4 −8 src/setup_generic.c
  5. +100 −140 tests/{DDalphaAMG_proj_test.c → DDalphaAMG_benchmark.c}
23 changes: 16 additions & 7 deletions src/DDalphaAMG.h
Original file line number Diff line number Diff line change
@@ -243,26 +243,35 @@
** vector_out = P^\dagger * vector_in.
** mg_status.success = 1
**/
void DDalphaAMG_restrict( double *vector_out, double *vector_in, int level,
void DDalphaAMG_restrict( float *vector_out, float *vector_in, int level,
DDalphaAMG_status *mg_status );

/**
** Optional - Prolongate vector to level:
** vector_out = P * vector_in.
** mg_status.success = 1
**/
void DDalphaAMG_prolongate( double *vector_out, double *vector_in, int level,
DDalphaAMG_status *mg_status );
void DDalphaAMG_prolongate( float *vector_out, float *vector_in, int level,
DDalphaAMG_status *mg_status );

/**
** Optional - Apply the operator:
** vector_out = D * vector_in.
** mg_status.success = 1
**/
void DDalphaAMG_apply_coarse_operator( double *vector_out, double *vector_in, int level,
DDalphaAMG_status *mg_status );
void DDalphaAMG_apply_coarse_operator( float *vector_out, float *vector_in, int level,
DDalphaAMG_status *mg_status );

/*
/**
** Extra - coarse vector utils
**/
float* DDalphaAMG_coarse_vector_alloc( int level );
void DDalphaAMG_coarse_vector_free( float *vector, int level );
void DDalphaAMG_coarse_vector_rand( float *vector, int level );
float DDalphaAMG_coarse_vector_residual( float *vector_out, float *vector_in, int level );


/*
* Concluding the following functions have to be call for freeing the memory and finalizing
* the software.
*/
@@ -376,7 +385,7 @@
** from 1 (no MG) to 4 (max number of levels)
**/
int number_of_levels;

/**
** Number of openmp threads:
** from 1 to omp_get_num_threads()
138 changes: 102 additions & 36 deletions src/DDalphaAMG_interface.c
Original file line number Diff line number Diff line change
@@ -1515,7 +1515,7 @@ static inline void DDalphaAMG_ms_driver( double **vector1_out, double *vector1_i

}

static inline void DDalphaAMG_proj_driver( double *vector_out, double *vector_in, int level, DDalphaAMG_status *mg_status, int _TYPE ) {
static inline void DDalphaAMG_proj_driver( float *vector_out, float *vector_in, int level, DDalphaAMG_status *mg_status, int _TYPE ) {

int t, z, y, x, i, j, mu, *ll;

@@ -1529,12 +1529,12 @@ static inline void DDalphaAMG_proj_driver( double *vector_out, double *vector_in
} else if(_TYPE==_PROLONGATE) {
from=ltmp->next_level;
to=ltmp;
} else {
from=ltmp->next_level;
} else { // includes _OPERATOR
from=ltmp;
to=ltmp;
}
vector_float rhs = from->p_float.b;
vector_float sol = to->p_float.x;
vector_float in = from->p_float.b;
vector_float out = to->p_float.x;

double t0, t1;
t0 = MPI_Wtime();
@@ -1548,58 +1548,62 @@ static inline void DDalphaAMG_proj_driver( double *vector_out, double *vector_in
ASSERT(vector_out!=NULL);
ASSERT(vector_in!=NULL);

ll = from->local_lattice;
for (t=0, j=0; t<ll[T]; t++)
for (z=0; z<ll[Z]; z++)
for (y=0; y<ll[Y]; y++)
for (x=0; x<ll[X]; x++) {
if(vector_index_fct!=NULL )
if(vector_index_fct!=NULL ) {
ll = from->local_lattice;
for (t=0, j=0; t<ll[T]; t++)
for (z=0; z<ll[Z]; z++)
for (y=0; y<ll[Y]; y++)
for (x=0; x<ll[X]; x++) {
i = vector_index_fct( t, z, y, x );
else
i = 2*j;

for ( mu=0; mu<from->num_lattice_site_var; mu++, j++ )
rhs[j] = ((complex_float)vector_in[i+2*mu] + I*(complex_float)vector_in[i+2*mu+1]);
}
in[j] = (vector_in[i+2*mu] + I*vector_in[i+2*mu+1]);
}
} else {
in = (complex_float*) vector_in;
out = (complex_float*) vector_out;
}

switch(_TYPE) {

case _RESTRICT :
THREADED(threading[0]->n_core)
restrict_float( sol, rhs, from, threading[omp_get_thread_num()] );
restrict_float( out, in, from, threading[omp_get_thread_num()] );
break;

case _PROLONGATE :
THREADED(threading[0]->n_core)
interpolate3_float( sol, rhs, to, threading[omp_get_thread_num()] );
interpolate3_float( out, in, to, threading[omp_get_thread_num()] );
break;

case _OPERATOR :
THREADED(threading[0]->n_core)
apply_operator_float( sol, rhs, &(from->p_float), from, threading[omp_get_thread_num()] );
THREADED(threading[0]->n_core)
if ( from->level==0 && g.odd_even )
coarse_odd_even_float_test( out, in, from, threading[omp_get_thread_num()] );
else
apply_operator_float( out, in, &(from->p_float), from, threading[omp_get_thread_num()] );

break;

default :
warning0("_TYPE not found in DDalphaAMG_driver. Returing vector in as vector out.");
sol=rhs;
break;
}

ll = to->local_lattice;
for (t=0, j=0; t<ll[T]; t++)
for (z=0; z<ll[Z]; z++)
for (y=0; y<ll[Y]; y++)
for (x=0; x<ll[X]; x++) {
if(vector_index_fct!=NULL )
if(vector_index_fct!=NULL ) {
ll = to->local_lattice;
for (t=0, j=0; t<ll[T]; t++)
for (z=0; z<ll[Z]; z++)
for (y=0; y<ll[Y]; y++)
for (x=0; x<ll[X]; x++) {
i = vector_index_fct( t, z, y, x );
else
i = 2*j;

for ( mu=0; mu<to->num_lattice_site_var; mu++, j++ ) {
vector_out[i+2*mu] = (double) creal(sol[j]);
vector_out[i+2*mu+1] = (double) cimag(sol[j]);

for ( mu=0; mu<to->num_lattice_site_var; mu++, j++ ) {
vector_out[i+2*mu] = creal(out[j]);
vector_out[i+2*mu+1] = cimag(out[j]);
}
}
}
}


mg_status->success = 1;
@@ -1761,18 +1765,80 @@ void DDalphaAMG_preconditioner_doublet( double *vector1_out, double *vector1_in,
set_n_flavours( 1 );
}

void DDalphaAMG_restrict( double *vector_out, double *vector_in, int level, DDalphaAMG_status *mg_status ) {
void DDalphaAMG_restrict( float *vector_out, float *vector_in, int level, DDalphaAMG_status *mg_status ) {
DDalphaAMG_proj_driver( vector_out, vector_in, level, mg_status, _RESTRICT );
}

void DDalphaAMG_prolongate( double *vector_out, double *vector_in, int level, DDalphaAMG_status *mg_status ) {
void DDalphaAMG_prolongate( float *vector_out, float *vector_in, int level, DDalphaAMG_status *mg_status ) {
DDalphaAMG_proj_driver( vector_out, vector_in, level, mg_status, _PROLONGATE );
}

void DDalphaAMG_apply_coarse_operator( double *vector_out, double *vector_in, int level, DDalphaAMG_status *mg_status ) {
void DDalphaAMG_apply_coarse_operator( float *vector_out, float *vector_in, int level, DDalphaAMG_status *mg_status ) {
DDalphaAMG_proj_driver( vector_out, vector_in, level, mg_status, _OPERATOR );
}

float* DDalphaAMG_coarse_vector_alloc( int level ) {
level_struct *ltmp=&l;
for(int i=0; i<level; i++)
ltmp=ltmp->next_level;
vector_float vector = NULL;
MALLOC(vector, complex_float, ltmp->vector_size );
return (float*) vector;
}

void DDalphaAMG_coarse_vector_free( float *vector, int level ) {
level_struct *ltmp=&l;
for(int i=0; i<level; i++)
ltmp=ltmp->next_level;
FREE(vector, float, 2*ltmp->vector_size );
}

void DDalphaAMG_coarse_vector_rand( float *vector, int level ) {
level_struct *ltmp=&l;
for(int i=0; i<level; i++)
ltmp=ltmp->next_level;

THREADED(threading[0]->n_core)
if(vector!=NULL){
int start, end;
compute_core_start_end( 0, ltmp->inner_vector_size, &start, &end, ltmp, threading[omp_get_thread_num()]);
vector_float_define_random( (vector_float) vector, start, end, ltmp );
}
else {
warning0("Vector NULL when calling DDalphaAMG_define_vector_const!");
}
}

float DDalphaAMG_coarse_vector_residual( float *vector_out, float *vector_in, int level ) {
level_struct *ltmp=&l;
for(int i=0; i<level; i++)
ltmp=ltmp->next_level;

float norm1 = 1., norm2 = 1.;
THREADED(threading[0]->n_core)
if(vector_in!=NULL){
norm1 = global_norm_float( (vector_float) vector_in, 0, ltmp->inner_vector_size, ltmp, threading[omp_get_thread_num()] );
if(vector_out!=NULL){
int start, end;
compute_core_start_end( 0, ltmp->inner_vector_size, &start, &end, ltmp, threading[omp_get_thread_num()]);
vector_float_minus( (vector_float) vector_out, (vector_float) vector_out, (vector_float) vector_in, start, end, ltmp );
}
}
else {
warning0("Vector NULL when calling DDalphaAMG_define_vector_const!");
}
if(vector_out!=NULL){
THREADED(threading[0]->n_core)
norm2 = global_norm_float( (vector_float) vector_out, 0, ltmp->inner_vector_size, ltmp, threading[omp_get_thread_num()] );
}
else {
warning0("Vector NULL when calling DDalphaAMG_define_vector_const!");
return norm1;
}

return norm2/norm1;
}

void DDalphaAMG_free( void ) {
method_free( &l );
g.setup_flag = 0;
6 changes: 3 additions & 3 deletions src/main.c
Original file line number Diff line number Diff line change
@@ -21,12 +21,12 @@

#include "main.h"

global_struct g;
extern global_struct g;
#ifdef HAVE_HDF5
Hdf5_fileinfo h5info;
#endif
struct common_thread_data *commonthreaddata;
struct Thread *no_threading;
extern struct common_thread_data *commonthreaddata;
extern struct Thread *no_threading;

int main( int argc, char **argv ) {

12 changes: 4 additions & 8 deletions src/setup_generic.c
Original file line number Diff line number Diff line change
@@ -224,14 +224,10 @@ void interpolation_PRECISION_define( vector_double *V, level_struct *l, struct T
vector_PRECISION_define_random( l->is_PRECISION.test_vector[k], 0, l->inner_vector_size, l );
END_LOCKED_MASTER(threading)
// }

smoother_PRECISION( buffer[0], NULL, l->is_PRECISION.test_vector[k], 1, _NO_RES, l, threading );
vector_PRECISION_copy( l->is_PRECISION.test_vector[k], buffer[0], start, end, l );
smoother_PRECISION( buffer[0], NULL, l->is_PRECISION.test_vector[k], g.method>=4?1:2, _NO_RES, l, threading );
vector_PRECISION_copy( l->is_PRECISION.test_vector[k], buffer[0], start, end, l );
smoother_PRECISION( buffer[0], NULL, l->is_PRECISION.test_vector[k], g.method>=4?1:3, _NO_RES, l, threading );
vector_PRECISION_copy( l->is_PRECISION.test_vector[k], buffer[0], start, end, l );

for ( i = 0; i < g.post_smooth_iter[l->depth]; i++ ) {
smoother_PRECISION( buffer[0], NULL, l->is_PRECISION.test_vector[k], g.method>=4? 1:(i+1), _NO_RES, l, threading );
vector_PRECISION_copy( l->is_PRECISION.test_vector[k], buffer[0], start, end, l );
}
pc += 6;
#ifdef DEBUG
START_MASTER(threading)
240 changes: 100 additions & 140 deletions tests/DDalphaAMG_proj_test.c → tests/DDalphaAMG_benchmark.c
Original file line number Diff line number Diff line change
@@ -26,44 +26,46 @@
#include <getopt.h>
#include <mpi.h>
#include <omp.h>
#include <math.h>

//library to include
#include "DDalphaAMG.h"

enum { T, Z, Y, X };
#define printf0(...) do{if(rank==0)printf(__VA_ARGS__);}while(0)

int rank, max_setup=0, conf_format;
float residual=1e-9;
int rank;
int iters=100;
int coarsest_local_lattice[4];
MPI_Comm comm_cart;
DDalphaAMG_init init;
DDalphaAMG_parameters params;
DDalphaAMG_status status;
char * conf_file = "../conf/8x8x8x8b6.0000id3n1";
char * options = "c:f:i:L:p:B:l:k:w:u:t:hr:C:K:V:m:s:S:v";
char * options = "L:p:B:l:i:wmet:h:V:v";

/*
* Setting standard values for DDalphaAMG_init
*/
void standard_init() {
init.global_lattice[T] = 8;
init.global_lattice[Z] = 8;
init.global_lattice[Y] = 8;
init.global_lattice[X] = 8;
coarsest_local_lattice[T] = 1;
coarsest_local_lattice[Z] = 1;
coarsest_local_lattice[Y] = 1;
coarsest_local_lattice[X] = 1;

init.kappa = 0.142857143;
init.mu = 0.1;
init.csw = 1;
init.mu = 0.;
init.csw = 0.;

init.bc = 1;

init.number_of_levels = 3;

init.procs[T] = 2;
init.procs[T] = 1;
init.procs[Z] = 1;
init.procs[Y] = 1;
init.procs[X] = 1;

init.number_openmp_threads = 2;
init.number_openmp_threads = 1;

init.block_lattice[T]=2;
init.block_lattice[Z]=2;
@@ -83,24 +85,17 @@ void help( char * arg0 ) {
static int printed = 0;
if(!printed) {
printf0("\n\n");
printf0("Usage: %s -c <conf> [<option(s)>]\n", arg0);
printf0(" -c PATH Configuration to load\n");
printf0(" -f # Configuration format (0 -> DDalphaAMG, 1 -> Lime)\n");
printf0(" -i PATH Input file (optional)\n");
printf0(" -L T Y X Z Lattice size in each direction\n");
printf0(" -p T Y X Z Processors in each direction\n");
printf0(" -B T Y X Z Block size in each direction on first level.\n");
printf0(" -k # kappa for the configuration\n");
printf0(" -w # c_sw for the configuration\n");
printf0(" -u # mu for the configuration\n");
printf0(" -t # Number of OpenMp threads\n");
printf0(" -r # Relative residual\n");
printf0(" -K # K-cycle tolerance\n");
printf0(" -C # Tolerance on coarsest grid\n");
printf0("Usage: %s [<option(s)>]\n", arg0);
printf0(" -L T Z Y X Local lattice size on the coarsest level in each direction\n");
printf0(" -p T Z Y X Processors in each direction\n");
printf0(" -B T Z Y X Block size in each direction on first level. A block size of (2 2 2 2) is used for the following levels.\n");
printf0(" -l # Number of levels, l (from 1 to 4)\n");
printf0(" -V 1 [2] [3] Basis vectors between each level (l-1)\n");
printf0(" -m 2 [3] [4] Factor for mu on coarse levels\n");
printf0(" -s 1 [2] [3] Setup iterations on each level (l-1)\n");
printf0(" -i # Number of iterations\n");
printf0(" -w use c_sw term (without the coarse self-coupling is diagonal)\n");
printf0(" -m use twisted mass term (without the coarse self-coupling is hermitian)\n");
printf0(" -e use ND twisted mass term (this double the size of the vectors)\n");
printf0(" -t # Number of OpenMp threads\n");
printf0(" -V 1 [2] [3] Basis vectors between each level (l-1 numbers)\n");
printf0(" -v Verbose\n");
}
printf0("\n\n");
@@ -116,15 +111,6 @@ void read_init_arg(int argc, char *argv[] ) {
optind = 0;
while ((opt = getopt(argc, argv, options)) != -1) {
switch (opt) {
case 'c':
conf_file = optarg;
break;
case 'f':
conf_format = atoi(optarg);
break;
case 'i':
init.init_file = optarg;
break;
case 'L':
optind--;
mu=0;
@@ -135,7 +121,7 @@ void read_init_arg(int argc, char *argv[] ) {
fail++;
break;
}
init.global_lattice[mu] = atoi(argv[optind]);
coarsest_local_lattice[mu] = atoi(argv[optind]);
mu++;
}
if(mu < 4) {
@@ -183,14 +169,14 @@ void read_init_arg(int argc, char *argv[] ) {
case 'l':
init.number_of_levels = atoi(optarg);
break;
case 'k':
init.kappa = atof(optarg);
case 'i':
iters = atoi(optarg);
break;
case 'w':
init.csw = atof(optarg);
init.csw = 1.;
break;
case 'u':
init.mu = atof(optarg);
case 'm':
init.mu = 1.;
break;
case 't':
init.number_openmp_threads = atoi(optarg);
@@ -203,12 +189,6 @@ void read_init_arg(int argc, char *argv[] ) {
break;
}
}

if(conf_file == NULL) {
printf0("Error: configuration file is missing (use -c PATH).\n");
p++;
fail++;
}

if(p) {
help(argv[0]);
@@ -224,15 +204,6 @@ void read_params_arg(int argc, char *argv[] ) {
optind = 0;
while ((opt = getopt(argc, argv, options)) != -1) {
switch (opt) {
case 'r':
residual = atof(optarg);
break;
case 'K':
params.kcycle_tolerance = atof(optarg);
break;
case 'C':
params.coarse_tolerance = atof(optarg);
break;
case 'V':
optind--;
mu=0;
@@ -247,50 +218,17 @@ void read_params_arg(int argc, char *argv[] ) {
mu++;
}
break;
case 'm':
optind--;
params.mu_factor[0]=1;
mu=1;
for ( ; optind < argc && *argv[optind] != '-'; optind++){
if(mu > 3) {
printf0("Error: too many arguments in -m.\n");
p++;
fail++;
break;
}
params.mu_factor[mu] = atof(argv[optind]);
mu++;
}
break;
case 's':
optind--;
mu=0;
for ( ; optind < argc && *argv[optind] != '-'; optind++){
if(mu > 2) {
printf0("Error: too many arguments in -s.\n");
p++;
fail++;
break;
}
params.setup_iterations[mu] = atoi(argv[optind]);
mu++;
}
case 'e':
params.epsbar = 1;
break;
case 'v':
params.print = 1;
break;
case '?':
default:
break;
}
}

if(conf_file == NULL) {
printf0("Error: configuration file is missing (use -c PATH).\n");
p++;
fail++;
}

if(p) {
help(argv[0]);
MPI_Abort(MPI_COMM_WORLD,0);
@@ -300,24 +238,37 @@ void read_params_arg(int argc, char *argv[] ) {
}

int main( int argc, char *argv[] ) {


MPI_Init( &argc, &argv );
MPI_Comm_rank( MPI_COMM_WORLD, &rank );

standard_init();
read_init_arg(argc, argv);

for(int mu=0; mu<4; mu++) {
init.global_lattice[mu] = coarsest_local_lattice[mu]*init.procs[mu]*init.block_lattice[mu];
for(int l=0; l<init.number_of_levels-2; l++) {
init.global_lattice[mu] *= 2;
}
}
printf0("global size: %d %d %d %d\n",init.global_lattice[T],init.global_lattice[X],init.global_lattice[Y],init.global_lattice[Z]);

printf0("Running initialization...\n");
DDalphaAMG_initialize( &init, &params, &status );
printf0("Initialized %d levels in %.2f sec\n", status.success, status.time);

int nlvl = status.success;
read_params_arg(argc, argv);

comm_cart = DDalphaAMG_get_communicator();
MPI_Comm_rank( comm_cart, &rank );

// we don't want to waste time with the setup
for(int l=0; l<nlvl; l++) {
params.setup_iterations[l]=0;
params.smoother_iterations=0;
}

printf0("Running updating\n");
DDalphaAMG_update_parameters( &params, &status );
if (status.success)
@@ -330,83 +281,92 @@ int main( int argc, char *argv[] ) {
double *gauge_field;
int vol = init.global_lattice[T] * init.global_lattice[X] * init.global_lattice[Y] *
init.global_lattice[Z] / init.procs[T] / init.procs[X] / init.procs[Y] / init.procs[Z];
gauge_field = (double *) malloc(18*4*vol*sizeof(double));
gauge_field = (double *) calloc(18*4*vol,sizeof(double));

printf0("Reading config.\n");
DDalphaAMG_read_configuration( gauge_field, conf_file, conf_format, &status );
printf0("Reading configuration time %.2f sec\n", status.time);
printf0("Desired plaquette %.13lf\n", status.info);
printf0("Using unitary config config.\n");
for(int i=0; i<4*vol; i++) {
gauge_field[i*18+0] = 1.;
gauge_field[i*18+8] = 1.;
gauge_field[i*18+16] = 1.;
}

printf0("Setting config.\n");
DDalphaAMG_set_configuration( gauge_field, &status );
printf0("Setting configuration time %.2f sec\n", status.time);
printf0("Computed plaquette %.13lf\n", status.info);

free(gauge_field);

printf0("Running setup\n");
DDalphaAMG_setup( &status );
printf0("Run %d setup iterations in %.2f sec (%.1f %% on coarse grid)\n", status.success,
status.time, 100.*(status.coarse_time/status.time));
printf0("Total iterations on fine grid %d\n", status.iter_count);
printf0("Total iterations on coarse grids %d\n", status.coarse_iter_count);

/*
* Defining fine and coarse vector randomly.
*/
double *vector1[nlvl], *vector2[nlvl];
int vols[nlvl], vars[nlvl];
vols[0]=vol;
vars[0]=3*4*2;

for ( int i=1; i<nlvl; i++ ) {
vols[i] = vols[i-1] / params.block_lattice[i-1][T] / params.block_lattice[i-1][X] / params.block_lattice[i-1][Y] / params.block_lattice[i-1][Z];
vars[i]=params.mg_basis_vectors[i-1]*2*2; // a factor of 2 is for the spin, the other for the complex
}
float *vector1[nlvl], *vector2[nlvl];

for ( int i=0; i<nlvl; i++ ) {
vector1[i] = (double *) malloc(vars[i]*vols[i]*sizeof(double));
vector2[i] = (double *) malloc(vars[i]*vols[i]*sizeof(double));
vector1[i] = DDalphaAMG_coarse_vector_alloc(i);
vector2[i] = DDalphaAMG_coarse_vector_alloc(i);
DDalphaAMG_coarse_vector_rand(vector1[i], i);
}

for ( int i=0; i<nlvl; i++ )
for ( int j=0; j<vars[i]*vols[i]; j++ )
vector1[i][j] = ((double)rand()/(double)RAND_MAX)-0.5;


for ( int i=1; i<nlvl; i++ ) {
printf0("Testing RP=1 on level %d\n",i);
printf0("\nTesting RP=1 on level %d\n",i);
DDalphaAMG_prolongate(vector2[i-1], vector1[i], i-1, &status);
DDalphaAMG_restrict(vector2[i], vector2[i-1], i-1, &status);

double num=0, den=0;
for ( int j=0; j<vars[i]*vols[i]; j++ ) {
vector2[i][j] -= vector1[i][j];
num += vector2[i][j]*vector2[i][j];
den += vector1[i][j]*vector1[i][j];
}
printf0("Restult (1-RP)v = %e\n\n", num/den);
float res = DDalphaAMG_coarse_vector_residual(vector2[i], vector1[i], i);
printf0("Result (1-RP)v = %e\n", res);
}

for ( int i=1; i<nlvl; i++ ) {
printf0("Testing coarse operator on level %d\n",i);
printf0("\nTesting coarse operator on level %d\n",i);
DDalphaAMG_apply_coarse_operator(vector2[i], vector1[i], i, &status);

DDalphaAMG_prolongate(vector1[i-1], vector1[i], i-1, &status);
DDalphaAMG_apply_coarse_operator(vector2[i-1], vector1[i-1], i-1, &status);
DDalphaAMG_restrict(vector2[i], vector2[i-1], i-1, &status);
DDalphaAMG_restrict(vector1[i], vector2[i-1], i-1, &status);

float res = DDalphaAMG_coarse_vector_residual(vector2[i], vector1[i], i);
printf0("Result (D_c-RDP)v = %e\n", res);

DDalphaAMG_apply_coarse_operator(vector1[i], vector1[i], i, &status);
// restoring vector1[i]
DDalphaAMG_restrict(vector1[i], vector1[i-1], i-1, &status);
}

double num=0, den=0;
for ( int j=0; j<vars[i]*vols[i]; j++ ) {
vector2[i][j] -= vector1[i][j];
num += vector2[i][j]*vector2[i][j];
den += vector1[i][j]*vector1[i][j];
printf0("\nStarting bechmarks:\n");
for ( int i=1; i<nlvl; i++ ) {
double time = 0;
for ( int j=0; j<iters; j++ ) {
DDalphaAMG_prolongate(vector1[i-1], vector1[i], i-1, &status);
time += status.time;
}
printf0("Level %d, prolongation from level %d to level %d: %e averaged over %d iters\n", i, i, i-1, time/iters, iters);
time = 0;
for ( int j=0; j<iters; j++ ) {
DDalphaAMG_restrict(vector1[i], vector1[i-1], i-1, &status);
time += status.time;
}
printf0("Level %d, restriction from level %d to level %d: %e averaged over %d iters\n", i, i-1, i, time/iters, iters);
time = 0;
for ( int j=0; j<iters; j++ ) {
DDalphaAMG_apply_coarse_operator(vector1[i], vector1[i], i, &status);
time += status.time;
}
printf0("Restult (D_c-RDP)v = %e\n\n", num/den);
printf0("Level %d, coarse operator on level %d: %e averaged over %d iters\n", i, i, time/iters, iters);
}


for ( int i=0; i<nlvl; i++ ) {
DDalphaAMG_coarse_vector_free(vector1[i], i);
DDalphaAMG_coarse_vector_free(vector2[i], i);
}

// free(vector_in);
// free(vector_out);
//free(gauge_field);
// DDalphaAMG_finalize();
DDalphaAMG_finalize();
MPI_Finalize();
return 0;
}