Update Intel flags and correct bugs related to: halo, check on single precision. FFT ph deallocation is only an issue with Intel

rfj82982 · rfj82982 · commit 232de050eda5 · 2024-03-11T14:32:42.000Z
diff --git a/cmake/D2D_MPI.cmake b/cmake/D2D_MPI.cmake
@@ -58,6 +58,7 @@ if (MPI_FOUND)
     # Force the mpirun to be coherent with the mpifortran
     string(REGEX REPLACE "mpif90" "mpirun" PATH_TO_MPIRUN "${MPI_Fortran_COMPILER}")
     string(REPLACE "mpiifort" "mpirun" PATH_TO_MPIRUN "${PATH_TO_MPIRUN}")
+    string(REPLACE "mpiifx" "mpirun" PATH_TO_MPIRUN "${PATH_TO_MPIRUN}")
     message(STATUS "Path to mpirun ${PATH_TO_MPIRUN}")
     set(MPIEXEC_EXECUTABLE "${PATH_TO_MPIRUN}" CACHE STRING
         "Force MPIRUN to be consistent with MPI_Fortran_COMPILER" FORCE)
diff --git a/cmake/compilers/D2D_flags_intel.cmake b/cmake/compilers/D2D_flags_intel.cmake
@@ -1,8 +1,10 @@
 # Compilers Flags for Intel
 # Check is the compiler is the new ifx based on LLVM or the old ifort
 if (Fortran_COMPILER_NAME MATCHES "IntelLLVM")
-  set(D2D_FFLAGS "-fpp -std08")
-  set(D2D_FFLAGS_RELEASE "-O2")
+  set(D2D_FFLAGS "-fpp -std08 -safe-cray-ptr -g -traceback")
+  set(D2D_FFLAGS_RELEASE "-O3")
+  #set(D2D_FFLAGS "-fpp -std08 -xHost -heaparrays -safe-cray-ptr -g -traceback")
+  #set(D2D_FFLAGS_RELEASE "-O3 -ipo")
 else (Fortran_COMPILER_NAME MATCHES "IntelLLVM")
   #set(CMAKE_Fortran_FLAGS "-cpp xSSE4.2 -axAVX,CORE-AVX-I,CORE-AVX2 -ipo -fp-model fast=2 -mcmodel=large -safe-cray-ptr")
   set(D2D_FFLAGS "-fpp -std08 -xHost -heaparrays -safe-cray-ptr -g -traceback")
diff --git a/examples/fft_physical_x/fft_c2c_x.f90 b/examples/fft_physical_x/fft_c2c_x.f90
@@ -35,7 +35,7 @@ program fft_c2c_x
 #ifdef DOUBLE_PREC
    real(mytype), parameter :: error_precision = 1.e-12_mytype
 #else
-   real(mytype), parameter :: error_precision = 1.e-6_mytype
+   real(mytype), parameter :: error_precision = 5.e-6_mytype
 #endif
    
 
diff --git a/examples/fft_physical_x/fft_grid_x.f90 b/examples/fft_physical_x/fft_grid_x.f90
@@ -35,7 +35,7 @@ program fft_physical_x
 #ifdef DOUBLE_PREC
    real(mytype), parameter :: error_precision = 1.e-12_mytype
 #else
-   real(mytype), parameter :: error_precision = 1.e-6_mytype
+   real(mytype), parameter :: error_precision = 5.e-6_mytype
 #endif
    
 
diff --git a/examples/fft_physical_x/fft_r2c_x.f90 b/examples/fft_physical_x/fft_r2c_x.f90
@@ -36,7 +36,7 @@ program fft_r2c_x
 #ifdef DOUBLE_PREC
    real(mytype), parameter :: error_precision = 1.e-12_mytype
 #else
-   real(mytype), parameter :: error_precision = 1.e-6_mytype
+   real(mytype), parameter :: error_precision = 5.e-6_mytype
 #endif
    
 
diff --git a/examples/fft_physical_z/fft_c2c_z.f90 b/examples/fft_physical_z/fft_c2c_z.f90
@@ -35,7 +35,7 @@ program fft_c2c_z
 #ifdef DOUBLE_PREC
    real(mytype), parameter :: error_precision = 1.e-12_mytype
 #else
-   real(mytype), parameter :: error_precision = 1.e-6_mytype
+   real(mytype), parameter :: error_precision = 5.e-6_mytype
 #endif
    
 
diff --git a/examples/fft_physical_z/fft_r2c_z.f90 b/examples/fft_physical_z/fft_r2c_z.f90
@@ -36,7 +36,7 @@ program fft_r2c_z
 #ifdef DOUBLE_PREC
    real(mytype), parameter :: error_precision = 1.e-12_mytype
 #else
-   real(mytype), parameter :: error_precision = 1.e-6_mytype
+   real(mytype), parameter :: error_precision = 5.e-6_mytype
 #endif
    
 
diff --git a/examples/halo_test/halo_test.f90 b/examples/halo_test/halo_test.f90
@@ -533,7 +533,7 @@ subroutine check_err(divh, divref, pencil)
       !$acc end kernels
       divmag = mag(tmp)
 
-      if (error < epsilon(divmag) * divmag) then
+      if (error < real(2.0,mytype) * epsilon(divmag) * divmag) then
          passing = .true.
       else
          passing = .false.
diff --git a/src/fft_common.f90 b/src/fft_common.f90
@@ -186,7 +186,7 @@ subroutine decomp_2d_fft_finalize
 
    if (nx_fft /= nx_global .or. ny_fft /= ny_global .or. nz_fft /= nz_global) then
       call decomp_info_finalize(ph)
-      deallocate (ph)
+      !deallocate (ph)
    end if
    nullify (ph)
    call decomp_info_finalize(sp)
diff --git a/src/halo_common.f90 b/src/halo_common.f90
@@ -140,7 +140,7 @@
           !$acc end kernels
        else if (ipencil == 3) then
           jst = decomp%zst(2); jen = decomp%zen(2)
-          ist = decomp%xst(1); ien = decomp%xen(1)
+          ist = decomp%zst(1); ien = decomp%zen(1)
           !$acc kernels default(present)
           do k = 1, s3  ! z all local
              do j = jst, jen