Merge branch 'deepmodeling:develop' into cusolver

deepmodeling · Nov 4, 2023 · c5732ad · c5732ad
2 parents a70814a + dbb28dc
commit c5732ad
Show file tree

Hide file tree

Showing 49 changed files with 6,210 additions and 5,420 deletions.
diff --git a/docs/advanced/input_files/input-main.md b/docs/advanced/input_files/input-main.md
@@ -88,6 +88,7 @@
     - [emin\_sto](#emin_sto)
     - [emax\_sto](#emax_sto)
     - [seed\_sto](#seed_sto)
+    - [initsto\_ecut](#initsto_ecut)
     - [initsto\_freq](#initsto_freq)
     - [npart\_sto](#npart_sto)
   - [Geometry relaxation](#geometry-relaxation)
@@ -338,7 +339,7 @@
     - [test\_skip\_ewald](#test_skip_ewald)
   - [Electronic conductivities](#electronic-conductivities)
     - [cal\_cond](#cal_cond)
-    - [cond\_nche](#cond_nche)
+    - [cond\_che\_thr](#cond_che_thr)
     - [cond\_dw](#cond_dw)
     - [cond\_wcut](#cond_wcut)
     - [cond\_dt](#cond_dt)
@@ -1119,6 +1120,14 @@ These variables are used to control the parameters of stochastic DFT (SDFT),  mi
   - -1: the seed is decided by time(NULL).
 - **Default**: 0
 
+### initsto_ecut
+
+- **Type**: Real
+- **Availability**: [esolver_type](#esolver_type) = `sdft`
+- **Description**: Stochastic wave functions are initialized in a large box generated by "4*`initsto_ecut`". `initsto_ecut` should be larger than [ecutwfc](#ecutwfc). In this method, SDFT results are the same when using different cores. Besides, coefficients of the same G are the same when ecutwfc is rising to initsto_ecut. If it is smaller than [ecutwfc](#ecutwfc), it will be turned off.
+- **Default**: 0.0
+- **Unit**: Ry
+
 ### initsto_freq
 
 - **Type**: Integer
@@ -1131,8 +1140,8 @@ These variables are used to control the parameters of stochastic DFT (SDFT),  mi
 ### npart_sto
 
 - **Type**: Integer
-- **Availability**: [method_sto](#method_sto) = `2` and [out_dos](#out_dos) = `True`
-- **Description**: Make memory cost to 1/npart_sto times of the previous one when running the post process of SDFT like DOS.
+- **Availability**: [method_sto](#method_sto) = `2` and [out_dos](#out_dos) = `True` or [cal_cond](#cal_cond) = `True`
+- **Description**: Make memory cost to 1/npart_sto times of the previous one when running the post process of SDFT like DOS or conductivities.
 - **Default**: 1
 
 [back to top](#full-list-of-input-keywords)
@@ -3130,12 +3139,12 @@ Thermal conductivities: $\kappa = \lim_{\omega\to 0}\kappa(\omega)$.
 - **Description**: Whether to calculate electronic conductivities.
 - **Default**: False
 
-### cond_nche
+### cond_che_thr
 
-- **Type**: Integer
+- **Type**: Real
 - **Availability**: [esolver_type](#esolver_type) = `sdft`
-- **Description**: Chebyshev expansion orders for stochastic Kubo Greenwood.
-- **Default**: 20
+- **Description**: Control the error of Chebyshev expansions for conductivities.
+- **Default**: 1e-8
 
 ### cond_dw
 

diff --git a/source/module_base/global_function.h b/source/module_base/global_function.h
@@ -336,10 +336,11 @@ static inline void FREE_MUL_PTR(T_element* v, const T_N_first N_first, const T_N
 	v = nullptr;
 }
 
-double ddot_real(
+template <typename T>
+T ddot_real(
         const int & dim,
-        const std::complex<double>* psi_L,
-        const std::complex<double>* psi_R,
+        const std::complex<T>* psi_L,
+        const std::complex<T>* psi_R,
         const bool reduce = true) ;
 
 //==========================================================

diff --git a/source/module_base/global_function_ddotreal.cpp b/source/module_base/global_function_ddotreal.cpp
@@ -1,38 +1,45 @@
-#include "global_function.h"
 #include "blas_connector.h"
+#include "global_function.h"
 #include "module_base/parallel_reduce.h"
 
 namespace ModuleBase
 {
 namespace GlobalFunc
 {
-    double ddot_real
-    (
-        const int &dim,
-        const std::complex<double>* psi_L,
-        const std::complex<double>* psi_R,
-        const bool reduce
-    )
+
+template double ddot_real(const int& dim,
+                          const std::complex<double>* psi_L,
+                          const std::complex<double>* psi_R,
+                          const bool reduce);
+template float ddot_real(const int& dim,
+                         const std::complex<float>* psi_L,
+                         const std::complex<float>* psi_R,
+                         const bool reduce);
+
+template <typename T>
+T ddot_real(const int& dim, const std::complex<T>* psi_L, const std::complex<T>* psi_R, const bool reduce)
+{
+    //<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
+    // qianrui modify 2021-3-14
+    // Note that  ddot_(2*dim,a,1,b,1) = REAL( zdotc_(dim,a,1,b,1) )
+    int dim2 = 2 * dim;
+    T *pL, *pR;
+    pL = (T*)psi_L;
+    pR = (T*)psi_R;
+    T result = BlasConnector::dot(dim2, pL, 1, pR, 1);
+    if (reduce)
+        Parallel_Reduce::reduce_pool(result);
+    return result;
+    //======================================================================
+    /*std::complex<double> result(0,0);
+    for (int i=0;i<dim;i++)
     {
-        //<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
-        //qianrui modify 2021-3-14
-        //Note that  ddot_(2*dim,a,1,b,1) = REAL( zdotc_(dim,a,1,b,1) )
-        int dim2=2*dim;
-        double *pL,*pR;
-        pL=(double *)psi_L;
-        pR=(double *)psi_R;
-        double result=BlasConnector::dot(dim2,pL,1,pR,1);
-        if (reduce)  Parallel_Reduce::reduce_pool(result);
-        return result;
-        //======================================================================
-        /*std::complex<double> result(0,0);
-        for (int i=0;i<dim;i++)
-        {
-            result += conj( psi_L[i] ) * psi_R[i];
-        }
-        Parallel_Reduce::reduce_complex_double_pool( result );
-        return result.real();*/
-        //>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>
+        result += conj( psi_L[i] ) * psi_R[i];
     }
+    Parallel_Reduce::reduce_complex_double_pool( result );
+    return result.real();*/
+    //>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>
 }
-}
+
+} // namespace GlobalFunc
+} // namespace ModuleBase
diff --git a/source/module_base/math_chebyshev_def.h b/source/module_base/math_chebyshev_def.h
@@ -434,7 +434,7 @@ void Chebyshev<REAL>::recurs_complex(
 	{
     	for(int i = 0; i < N; ++i)
     	{
-        	arraynp1[i+ib*LDA]=2.0*arraynp1[i+ib*LDA]-arrayn_1[i+ib*LDA];
+        	arraynp1[i+ib*LDA]=REAL(2.0)*arraynp1[i+ib*LDA]-arrayn_1[i+ib*LDA];
     	}
 	}
 }

diff --git a/source/module_base/module_mixing/broyden_mixing.h b/source/module_base/module_mixing/broyden_mixing.h
@@ -277,7 +277,6 @@ class Broyden_Mixing : public Mixing
         }
         else
         {
-            beta(0, 0) = inner_product(FP_dF, FP_dF);
             coef[0] = 1.0;
         }
 

diff --git a/source/module_basis/module_pw/pw_basis_k.cpp b/source/module_basis/module_pw/pw_basis_k.cpp
@@ -295,9 +295,9 @@ ModuleBase::Vector3<double>& PW_Basis_K::getgcar(const int ik, const int igl) co
 ModuleBase::Vector3<double> PW_Basis_K::getgdirect(const int ik, const int igl) const
 {
     ModuleBase::Vector3<double> f = this->latvec * this->gcar[ik * this->npwk_max + igl];
-    f.x = floor(f.x+0.1);
-    f.y = floor(f.y+0.1);
-    f.z = floor(f.z+0.1);
+    f.x = std::round(f.x);
+    f.y = std::round(f.y);
+    f.z = std::round(f.z);
     return f;
 }
 

diff --git a/source/module_cell/module_symmetry/symmetry.cpp b/source/module_cell/module_symmetry/symmetry.cpp
@@ -1627,6 +1627,7 @@ for (int g_index = 0; g_index < group_index; g_index++)
     delete[] isymflag;
     delete[] table_xyz;
     delete[] invmap;
+    delete[] count_xyz;
     ModuleBase::timer::tick("Symmetry","rhog_symmetry");
 }
 

diff --git a/source/module_esolver/esolver_ks_pw.cpp b/source/module_esolver/esolver_ks_pw.cpp
@@ -108,6 +108,7 @@ ESolver_KS_PW<T, Device>::~ESolver_KS_PW()
         delete this->psi_init;
         this->psi_init = nullptr;
     }
+    delete this->psi;
 }
 template <typename T, typename Device>
 void ESolver_KS_PW<T, Device>::Init_GlobalC(Input& inp, UnitCell& cell)

diff --git a/source/module_esolver/esolver_ks_pw_tool.cpp b/source/module_esolver/esolver_ks_pw_tool.cpp
@@ -55,7 +55,8 @@ void ESolver_KS_PW<T, Device>::KG(const double fwhmin,
     ModuleBase::GlobalFunc::ZEROS(ct22, nt);
 
     hamilt::Velocity velop(this->pw_wfc, this->kv.isk.data(), &GlobalC::ppcell, &GlobalC::ucell, INPUT.cond_nonlocal);
-    double decut = (wcut + 5*fwhmin)  / ModuleBase::Ry_to_eV;
+    double decut = (wcut + fwhmin)  / ModuleBase::Ry_to_eV;
+    std::cout<<"Recommended dt: "<<0.25*M_PI/decut<<" a.u."<<std::endl;
     for (int ik = 0; ik < nk; ++ik)
     {
         velop.init(ik);
@@ -249,11 +250,12 @@ void ESolver_KS_PW<T, Device>::calcondw(const int nt,
         ofscond << std::setw(8) << (iw + 0.5) * dw * ModuleBase::Ry_to_eV << std::setw(20) << cw11[iw] << std::setw(20)
                 << kappa[iw] << std::setw(20) << cw12[iw] << std::setw(20) << cw22[iw] << std::endl;
     }
-    std::cout << std::setprecision(6) << "DC electrical conductivity: " << cw11[0] - (cw11[1] - cw11[0]) * 0.5
-              << " Sm^-1" << std::endl;
-    std::cout << std::setprecision(6) << "Thermal conductivity: " << kappa[0] - (kappa[1] - kappa[0]) * 0.5
-              << " W(mK)^-1" << std::endl;
-    ;
+    double sigma0 = cw11[0] - (cw11[1] - cw11[0]) * 0.5;
+	double kappa0 = kappa[0] - (kappa[1] - kappa[0]) * 0.5;
+    double Lorent0 = kappa0 / sigma0 / Occupy::gaussian_parameter / ModuleBase::Ry_to_eV / 11604.518026 * pow(1.6021766208e-19/1.3806505e-23, 2);
+    std::cout << std::setprecision(6) << "DC electrical conductivity: " << sigma0 << " Sm^-1" << std::endl;
+    std::cout << std::setprecision(6) << "Thermal conductivity: " << kappa0 << " W(mK)^-1" << std::endl;
+    std::cout << std::setprecision(6) << "Lorenz number: "<<Lorent0<<" k_B^2/e^2"<<std::endl;
     ofscond.close();
 
     delete[] cw11;
-Original file line number
+Diff line change
@@ Expand Up / @@ -434,7 +434,7 @@ void Chebyshev<REAL>::recurs_complex( @@
     	{
         	for(int i = 0; i < N; ++i)
         	{
-            	arraynp1[i+ib*LDA]=2.0*arraynp1[i+ib*LDA]-arrayn_1[i+ib*LDA];
+            	arraynp1[i+ib*LDA]=REAL(2.0)*arraynp1[i+ib*LDA]-arrayn_1[i+ib*LDA];
         	}
     	}
     }
@@ Expand Down @@