We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
It seems like the Ax and Ay arrays are somehow mixed up in GPUE, and the current master branch does not evolve correctly with rotation.
Ax
Ay
The following apply_gauge(...) function works:
apply_gauge(...)
// 3D void apply_gauge(Grid &par, double2 *wfc, double2 *Ax, double2 *Ay, double2 *Az, double renorm_factor_x, double renorm_factor_y, double renorm_factor_z, bool flip, cufftHandle plan_1d, cufftHandle plan_dim2, cufftHandle plan_dim3, double dx, double dy, double dz, double time, int yDim, int size){ dim3 grid = par.grid; dim3 threads = par.threads; if (flip){ // 1d forward / mult by Ay cufftHandleError( cufftExecZ2Z(plan_1d, wfc, wfc, CUFFT_FORWARD) ); scalarMult<<<grid,threads>>>(wfc, renorm_factor_z, wfc); cudaCheckError(); if(par.bval("Ay_time")){ EqnNode_gpu* Ay_eqn = par.astval("Ay"); int e_num = par.ival("Ay_num"); ast_cmult<<<grid,threads>>>(wfc, wfc, Ay_eqn, dx, dy, dz, time, e_num); cudaCheckError(); } else{ cMult<<<grid,threads>>>(wfc, (cufftDoubleComplex*) Ay, wfc); cudaCheckError(); } cufftHandleError( cufftExecZ2Z(plan_1d, wfc, wfc, CUFFT_INVERSE) ); scalarMult<<<grid,threads>>>(wfc, renorm_factor_z, wfc); cudaCheckError(); // loop to multiply by Ax for (int i = 0; i < yDim; i++){ cufftHandleError( cufftExecZ2Z(plan_dim2, &wfc[i*size], &wfc[i*size], CUFFT_FORWARD) ); } scalarMult<<<grid,threads>>>(wfc, renorm_factor_y, wfc); cudaCheckError(); if(par.bval("Ax_time")){ EqnNode_gpu* Ax_eqn = par.astval("Ax"); int e_num = par.ival("Ax_num"); ast_cmult<<<grid,threads>>>(wfc, wfc, Ax_eqn, dx, dy, dz, time, e_num); cudaCheckError(); } else{ cMult<<<grid,threads>>>(wfc, (cufftDoubleComplex*) Ax, wfc); cudaCheckError(); } for (int i = 0; i < yDim; i++){ //size = xDim * zDim; cufftHandleError( cufftExecZ2Z(plan_dim2, &wfc[i*size], &wfc[i*size], CUFFT_INVERSE) ); } scalarMult<<<grid,threads>>>(wfc, renorm_factor_y, wfc); cudaCheckError(); // 1D FFT to Az cufftHandleError( cufftExecZ2Z(plan_dim3, wfc, wfc, CUFFT_FORWARD) ); scalarMult<<<grid,threads>>>(wfc, renorm_factor_x, wfc); cudaCheckError(); if(par.bval("Az_time")){ EqnNode_gpu* Az_eqn = par.astval("Az"); int e_num = par.ival("Az_num"); ast_cmult<<<grid,threads>>>(wfc, wfc, Az_eqn, dx, dy, dz, time, e_num); cudaCheckError(); } else{ cMult<<<grid,threads>>>(wfc, (cufftDoubleComplex*) Az, wfc); cudaCheckError(); } cufftHandleError( cufftExecZ2Z(plan_dim3, wfc, wfc, CUFFT_INVERSE) ); scalarMult<<<grid,threads>>>(wfc, renorm_factor_x, wfc); cudaCheckError(); } else{ // 1D FFT to Az cufftHandleError( cufftExecZ2Z(plan_dim3, wfc, wfc, CUFFT_FORWARD) ); scalarMult<<<grid,threads>>>(wfc, renorm_factor_z, wfc); cudaCheckError(); if(par.bval("Az_time")){ EqnNode_gpu* Az_eqn = par.astval("Az"); int e_num = par.ival("Az_num"); ast_cmult<<<grid,threads>>>(wfc, wfc, Az_eqn, dx, dy, dz, time, e_num); cudaCheckError(); } else{ cMult<<<grid,threads>>>(wfc, (cufftDoubleComplex*) Az, wfc); cudaCheckError(); } cufftHandleError( cufftExecZ2Z(plan_dim3, wfc, wfc, CUFFT_INVERSE) ); scalarMult<<<grid,threads>>>(wfc, renorm_factor_z, wfc); cudaCheckError(); // loop to multiply by Ax for (int i = 0; i < yDim; i++){ cufftHandleError( cufftExecZ2Z(plan_dim2, &wfc[i*size], &wfc[i*size], CUFFT_FORWARD) ); } scalarMult<<<grid,threads>>>(wfc, renorm_factor_x, wfc); cudaCheckError(); if(par.bval("Ax_time")){ EqnNode_gpu* Ax_eqn = par.astval("Ax"); int e_num = par.ival("Ax_num"); ast_cmult<<<grid,threads>>>(wfc, wfc, Ax_eqn, dx, dy, dz, time, e_num); cudaCheckError(); } else{ cMult<<<grid,threads>>>(wfc, (cufftDoubleComplex*) Ax, wfc); cudaCheckError(); } for (int i = 0; i < yDim; i++){ //size = xDim * zDim; cufftHandleError( cufftExecZ2Z(plan_dim2, &wfc[i*size], &wfc[i*size], CUFFT_INVERSE) ); } scalarMult<<<grid,threads>>>(wfc, renorm_factor_x, wfc); cudaCheckError(); // 1d forward / mult by Ay cufftHandleError( cufftExecZ2Z(plan_1d, wfc, wfc, CUFFT_FORWARD) ); scalarMult<<<grid,threads>>>(wfc, renorm_factor_y, wfc); cudaCheckError(); if(par.bval("Ay_time")){ EqnNode_gpu* Ay_eqn = par.astval("Ay"); int e_num = par.ival("Ay_num"); ast_cmult<<<grid,threads>>>(wfc, wfc, Ay_eqn, dx, dy, dz, time, e_num); cudaCheckError(); } else{ cMult<<<grid,threads>>>(wfc, (cufftDoubleComplex*) Ay, wfc); cudaCheckError(); } cufftHandleError( cufftExecZ2Z(plan_1d, wfc, wfc, CUFFT_INVERSE) ); scalarMult<<<grid,threads>>>(wfc, renorm_factor_y, wfc); cudaCheckError(); } }
Note that the dim2 plan is for some reason for the Ax operator instead of Ay. I am looking into this now.
dim2
These changes are on the 3d_rot_fix branch.
3d_rot_fix
The text was updated successfully, but these errors were encountered:
No branches or pull requests
It seems like the
Ax
andAy
arrays are somehow mixed up in GPUE, and the current master branch does not evolve correctly with rotation.The following
apply_gauge(...)
function works:Note that the
dim2
plan is for some reason for the Ax operator instead of Ay. I am looking into this now.These changes are on the
3d_rot_fix
branch.The text was updated successfully, but these errors were encountered: