diff --git a/ext/ext/ck3.py b/ext/ext/ck3.py index aaa47dba1..f3ee1e7b9 100644 --- a/ext/ext/ck3.py +++ b/ext/ext/ck3.py @@ -1,5 +1,12 @@ #!/usr/bin/env python3 +''' +python3 this_script.py -c y3/config.yaml + +or + +python3 this_script.py y3/config.yaml +''' import os import re @@ -14,7 +21,7 @@ ######################################################################## -alphabets = { +rc_alphabets = { '0' : 'a', '1' : 'b', '2' : 'c', '3' : 'd', '4' : 'e', '5' : 'f', '6' : 'g', '7' : 'h', '8' : 'i', '9' : 'j', '10': 'k', '11': 'l', '12': 'm', '13': 'n', '14': 'o', @@ -22,7 +29,6 @@ '20': 'u', '21': 'v', '22': 'w', '23': 'x', '24': 'y', '25': 'z'} - rc_kernel2c = ''' TEMPLATE_PARAMS \ __global__ \ @@ -40,31 +46,24 @@ USING_DEVICE_VARIABLES KERNEL_CONSTEXPR_FLAGS \ const int ithread = threadIdx.x + blockIdx.x * blockDim.x; - DECLARE_ZERO_LOCAL_COUNT DECLARE_ZERO_LOCAL_ENERGY DECLARE_ZERO_LOCAL_VIRIAL DECLARE_FORCE_I_AND_K DECLARE_PARAMS_I_AND_K - for (int ii = ithread; ii < nexclude; ii += blockDim.x * gridDim.x) { KERNEL_SCALED_KLANE KERNEL_ZERO_LOCAL_FORCE - int i = exclude[ii][0]; int k = exclude[ii][1]; KERNEL_LOAD_1X_SCALES - KERNEL_INIT_EXCLUDE_PARAMS_I_AND_K - constexpr bool incl = true; KERNEL_SCALED_PAIRWISE_INTERACTION - KERNEL_SAVE_LOCAL_FORCE } - KERNEL_SUM_COUNT KERNEL_SUM_ENERGY KERNEL_SUM_VIRIAL } ''' @@ -91,20 +90,16 @@ const int nwarp = blockDim.x * gridDim.x / WARP_SIZE; const int ilane = threadIdx.x & (WARP_SIZE - 1); - DECLARE_ZERO_LOCAL_COUNT DECLARE_ZERO_LOCAL_ENERGY DECLARE_ZERO_LOCAL_VIRIAL DECLARE_PARAMS_I_AND_K DECLARE_FORCE_I_AND_K - for (int iw = iwarp; iw < nakpl; iw += nwarp) { KERNEL_ZERO_LOCAL_FORCE - int tri, tx, ty; tri = iakpl[iw]; tri_to_xy(tri, tx, ty); - int iid = ty * WARP_SIZE + ilane; int atomi = min(iid, n - 1); int i = sorted[atomi].unsorted; @@ -114,7 +109,6 @@ KERNEL_INIT_PARAMS_I_AND_K KERNEL_SYNCWARP - KERNEL_LOAD_INFO_VARIABLES for (int j = 0; j < WARP_SIZE; ++j) { int srclane = (ilane + j) & (WARP_SIZE - 1); \ @@ -124,16 +118,13 @@ KERNEL_SCALE_1 \ KERNEL_FULL_PAIRWISE_INTERACTION - iid = __shfl_sync(ALL_LANES, iid, ilane + 1); KERNEL_SHUFFLE_PARAMS_I KERNEL_SHUFFLE_LOCAL_FORCE_I } - KERNEL_SAVE_LOCAL_FORCE KERNEL_SYNCWARP } - KERNEL_SUM_COUNT KERNEL_SUM_ENERGY KERNEL_SUM_VIRIAL } ''' @@ -159,15 +150,12 @@ const int nwarp = blockDim.x * gridDim.x / WARP_SIZE; const int ilane = threadIdx.x & (WARP_SIZE - 1); - DECLARE_ZERO_LOCAL_COUNT DECLARE_ZERO_LOCAL_ENERGY DECLARE_ZERO_LOCAL_VIRIAL DECLARE_PARAMS_I_AND_K DECLARE_FORCE_I_AND_K - for (int iw = iwarp; iw < niak; iw += nwarp) { KERNEL_ZERO_LOCAL_FORCE - int ty = iak[iw]; int atomi = ty * WARP_SIZE + ilane; int i = sorted[atomi].unsorted; @@ -176,22 +164,18 @@ KERNEL_INIT_PARAMS_I_AND_K KERNEL_SYNCWARP - for (int j = 0; j < WARP_SIZE; ++j) { KERNEL_KLANE2 \ bool incl = atomk > 0; \ KERNEL_SCALE_1 \ KERNEL_FULL_PAIRWISE_INTERACTION - KERNEL_SHUFFLE_PARAMS_I KERNEL_SHUFFLE_LOCAL_FORCE_I } - KERNEL_SAVE_LOCAL_FORCE KERNEL_SYNCWARP } - KERNEL_SUM_COUNT KERNEL_SUM_ENERGY KERNEL_SUM_VIRIAL } ''' @@ -220,42 +204,33 @@ const int nwarp = blockDim.x * gridDim.x / WARP_SIZE; const int ilane = threadIdx.x & (WARP_SIZE - 1); - DECLARE_ZERO_LOCAL_COUNT DECLARE_ZERO_LOCAL_ENERGY DECLARE_ZERO_LOCAL_VIRIAL DECLARE_PARAMS_I_AND_K DECLARE_FORCE_I_AND_K - KERNEL_HAS_1X_SCALE for (int ii = ithread; ii < nexclude; ii += blockDim.x * gridDim.x) { KERNEL_SCALED_KLANE KERNEL_ZERO_LOCAL_FORCE - int i = exclude[ii][0]; int k = exclude[ii][1]; KERNEL_LOAD_1X_SCALES - KERNEL_INIT_EXCLUDE_PARAMS_I_AND_K - constexpr bool incl = true; KERNEL_SCALED_PAIRWISE_INTERACTION - KERNEL_SAVE_LOCAL_FORCE } // */ - for (int iw = iwarp; iw < nakpl; iw += nwarp) { KERNEL_ZERO_LOCAL_FORCE - int tri, tx, ty; tri = iakpl[iw]; tri_to_xy(tri, tx, ty); - int iid = ty * WARP_SIZE + ilane; int atomi = min(iid, n - 1); int i = sorted[atomi].unsorted; @@ -265,7 +240,6 @@ KERNEL_INIT_PARAMS_I_AND_K KERNEL_SYNCWARP - KERNEL_LOAD_INFO_VARIABLES for (int j = 0; j < WARP_SIZE; ++j) { int srclane = (ilane + j) & (WARP_SIZE - 1); \ @@ -275,20 +249,16 @@ KERNEL_SCALE_1 \ KERNEL_FULL_PAIRWISE_INTERACTION - iid = __shfl_sync(ALL_LANES, iid, ilane + 1); KERNEL_SHUFFLE_PARAMS_I KERNEL_SHUFFLE_LOCAL_FORCE_I } - KERNEL_SAVE_LOCAL_FORCE KERNEL_SYNCWARP } - for (int iw = iwarp; iw < niak; iw += nwarp) { KERNEL_ZERO_LOCAL_FORCE - int ty = iak[iw]; int atomi = ty * WARP_SIZE + ilane; int i = sorted[atomi].unsorted; @@ -297,22 +267,18 @@ KERNEL_INIT_PARAMS_I_AND_K KERNEL_SYNCWARP - for (int j = 0; j < WARP_SIZE; ++j) { KERNEL_KLANE2 \ bool incl = atomk > 0; \ KERNEL_SCALE_1 \ KERNEL_FULL_PAIRWISE_INTERACTION - KERNEL_SHUFFLE_PARAMS_I KERNEL_SHUFFLE_LOCAL_FORCE_I } - KERNEL_SAVE_LOCAL_FORCE KERNEL_SYNCWARP } - KERNEL_SUM_COUNT KERNEL_SUM_ENERGY KERNEL_SUM_VIRIAL @@ -570,7 +536,7 @@ def _load_scale_param(ptype:str, stem:str, input:str, separate_scaled_pairwise:b v = '' for i in range(1,len(ss)): idx = ss[i] - al = alphabets[idx] + al = rc_alphabets[idx] if input is None: if not separate_scaled_pairwise: v = v + '{} {}{} = 1;'.format(t, stem, al) @@ -610,6 +576,8 @@ def _kv(self, k:str): return self.config[k] else: return '' + + def cudaReplaceDict(self) -> dict: d = {} config = self.config diff --git a/src/cu/amoeba/dfield_cu1.cc b/src/cu/amoeba/dfield_cu1.cc index 2a2fd3fad..e6ffdca35 100644 --- a/src/cu/amoeba/dfield_cu1.cc +++ b/src/cu/amoeba/dfield_cu1.cc @@ -1,4 +1,4 @@ -// ck.py Version 3.0.0 +// ck.py Version 3.0.2 template <class ETYP> __global__ void dfield_cu1(int n, TINKER_IMAGE_PARAMS, real off, const unsigned* restrict dpinfo, int nexclude, diff --git a/src/cu/amoeba/emplar.cu b/src/cu/amoeba/emplar.cu index 9d07c2ec5..7c919d6f9 100644 --- a/src/cu/amoeba/emplar.cu +++ b/src/cu/amoeba/emplar.cu @@ -1,10 +1,10 @@ #include "ff/amoeba/empole.h" #include "ff/amoeba/epolar.h" #include "ff/amoeba/induce.h" -#include "ff/modamoeba.h" #include "ff/cumodamoeba.h" #include "ff/elec.h" #include "ff/image.h" +#include "ff/modamoeba.h" #include "ff/pme.h" #include "ff/spatial.h" #include "ff/switch.h" @@ -76,13 +76,11 @@ void rotG2QIMat_v2(const real (&restrict r)[3][3], // template <class Ver, class ETYP> __device__ -void pairMplar( // - real r2, real3 dR, real mscale, real dscale, real pscale, real uscale, // - real ci, real3 Id, real Iqxx, real Iqxy, real Iqxz, real Iqyy, real Iqyz, real Iqzz, real3 Iud, real3 Iup, real pdi, - real pti, // - real ck, real3 Kd, real Kqxx, real Kqxy, real Kqxz, real Kqyy, real Kqyz, real Kqzz, real3 Kud, real3 Kup, real pdk, - real ptk, // - real f, real aewald, // +void pairMplar(real r2, real3 dR, real mscale, real dscale, real pscale, real uscale, // + real ci, real3 Id, real Iqxx, real Iqxy, real Iqxz, real Iqyy, real Iqyz, real Iqzz, // + real3 Iud, real3 Iup, real pdi, real pti, // + real ck, real3 Kd, real Kqxx, real Kqxy, real Kqxz, real Kqyy, real Kqyz, real Kqzz, // + real3 Kud, real3 Kup, real pdk, real ptk, real f, real aewald, // real& restrict frcxi, real& restrict frcyi, real& restrict frczi, real& restrict frcxk, real& restrict frcyk, real& restrict frczk, real& restrict trqxi, real& restrict trqyi, real& restrict trqzi, real& restrict trqxk, real& restrict trqyk, real& restrict trqzk, real& restrict eo, real& restrict voxx, real& restrict voxy, @@ -92,74 +90,87 @@ void pairMplar( // constexpr bool do_g = Ver::g; constexpr bool do_v = Ver::v; + if CONSTEXPR (eq<ETYP, EWALD>()) { + mscale = 1; + dscale = 0.5f; + pscale = 0.5f; + uscale = 0.5f; + } else { + dscale *= 0.5f; + pscale *= 0.5f; + uscale *= 0.5f; + } + // a rotation matrix that rotates (xr,yr,zr) to (0,0,r); R G = Q real rot[3][3]; real bn[6]; real sr3, sr5, sr7, sr9; real r = REAL_SQRT(r2); real invr1 = REAL_RECIP(r); - { - real rr2 = invr1 * invr1; - real rr1 = invr1; - real rr3 = rr1 * rr2; - real rr5 = 3 * rr3 * rr2; - real rr7 = 5 * rr5 * rr2; - real rr9 = 7 * rr7 * rr2; - real rr11; - if CONSTEXPR (do_g) { - rr11 = 9 * rr9 * rr2; - } + real rr2 = invr1 * invr1; + real rr1 = invr1; + real rr3 = rr1 * rr2; + real rr5 = 3 * rr3 * rr2; + real rr7 = 5 * rr5 * rr2; + real rr9 = 7 * rr7 * rr2; + real rr11; + if CONSTEXPR (do_g) { + rr11 = 9 * rr9 * rr2; + } - if CONSTEXPR (eq<ETYP, EWALD>()) { - if CONSTEXPR (!do_g) { - damp_ewald<5>(bn, r, invr1, rr2, aewald); - } else { - damp_ewald<6>(bn, r, invr1, rr2, aewald); - } - } else if CONSTEXPR (eq<ETYP, NON_EWALD>()) { - bn[0] = rr1; - bn[1] = rr3; - bn[2] = rr5; - bn[3] = rr7; - bn[4] = rr9; - if CONSTEXPR (do_g) { - bn[5] = rr11; - } + if CONSTEXPR (eq<ETYP, EWALD>()) { + if CONSTEXPR (!do_g) { + damp_ewald<5>(bn, r, invr1, rr2, aewald); + } else { + damp_ewald<6>(bn, r, invr1, rr2, aewald); } + } else if CONSTEXPR (eq<ETYP, NON_EWALD>()) { + bn[0] = rr1; + bn[1] = rr3; + bn[2] = rr5; + bn[3] = rr7; + bn[4] = rr9; + if CONSTEXPR (do_g) { + bn[5] = rr11; + } + } - // if use_thole - real ex3, ex5, ex7, ex9; - damp_thole4(r, pdi, pti, pdk, ptk, ex3, ex5, ex7, ex9); - sr3 = bn[1] - ex3 * rr3; - sr5 = bn[2] - ex5 * rr5; - sr7 = bn[3] - ex7 * rr7; - sr9 = bn[4] - ex9 * rr9; - // end if use_thole - - real3 rotz = invr1 * dR; - // pick a random vector as rotx; rotx and rotz cannot be parallel - real3 rotx = rotz; - if (dR.y != 0 || dR.z != 0) - rotx.x += 1; - else - rotx.y += 1; - // Gram–Schmidt process for rotx with respect to rotz - rotx -= dot3(rotx, rotz) * rotz; - // normalize rotx - real invxlen = REAL_RSQRT(dot3(rotx, rotx)); - rotx = invxlen * rotx; - real3 roty = cross(rotz, rotx); - rot[0][0] = rotx.x; - rot[0][1] = rotx.y; - rot[0][2] = rotx.z; - rot[1][0] = roty.x; - rot[1][1] = roty.y; - rot[1][2] = roty.z; - rot[2][0] = rotz.x; - rot[2][1] = rotz.y; - rot[2][2] = rotz.z; + // if use_thole + real ex3, ex5, ex7, ex9; + damp_thole4(r, pdi, pti, pdk, ptk, ex3, ex5, ex7, ex9); + sr3 = bn[1] - ex3 * rr3; + sr5 = bn[2] - ex5 * rr5; + sr7 = bn[3] - ex7 * rr7; + sr9 = bn[4] - ex9 * rr9; + // end if use_thole + + for (int i = 0; i < 6; ++i) { + bn[i] *= mscale; } + real3 rotz = invr1 * dR; + // pick a random vector as rotx; rotx and rotz cannot be parallel + real3 rotx = rotz; + if (dR.y != 0 || dR.z != 0) + rotx.x += 1; + else + rotx.y += 1; + // Gram–Schmidt process for rotx with respect to rotz + rotx -= dot3(rotx, rotz) * rotz; + // normalize rotx + real invxlen = REAL_RSQRT(dot3(rotx, rotx)); + rotx = invxlen * rotx; + real3 roty = cross(rotz, rotx); + rot[0][0] = rotx.x; + rot[0][1] = rotx.y; + rot[0][2] = rotx.z; + rot[1][0] = roty.x; + rot[1][1] = roty.y; + rot[1][2] = roty.z; + rot[2][0] = rotz.x; + rot[2][1] = rotz.y; + rot[2][2] = rotz.z; + real3 di, dk; rotG2QIVector(rot, Id, di); rotG2QIVector(rot, Kd, dk); @@ -180,17 +191,6 @@ void pairMplar( // real phi2[10] = {0}; real phi1z[10] = {0}; - if CONSTEXPR (eq<ETYP, EWALD>()) { - mscale = 1; - dscale = 0.5f; - pscale = 0.5f; - uscale = 0.5f; - } else { - dscale *= 0.5f; - pscale *= 0.5f; - uscale *= 0.5f; - } - // C-C { real coef1 = bn[0]; @@ -238,7 +238,6 @@ void pairMplar( // // Q-C and C-Q { - real coef3 = bn[1]; real coef5 = bn[2] * r2; real coez5 = bn[2] * r; real coez7 = bn[3] * r2 * r; @@ -247,17 +246,14 @@ void pairMplar( // phi2[0] += coef5 * qizz; phi1z[0] += -(2 * coez5 - coez7) * qkzz; // d2phi_c q - phi1[4] += -coef3 * ck; - phi1[5] += -coef3 * ck; - phi1[6] += -(coef3 - coef5) * ck; + // phi1[4]; phi1[5]; + phi1[6] += coef5 * ck; // phi1[7]; phi1[8]; phi1[9]; - phi2[4] += -coef3 * ci; - phi2[5] += -coef3 * ci; - phi2[6] += -(coef3 - coef5) * ci; + // phi2[4]; phi2[5]; + phi2[6] += coef5 * ci; // phi2[7]; phi2[8]; phi2[9]; - phi1z[4] += -coez5 * ck; - phi1z[5] += -coez5 * ck; - phi1z[6] += -(3 * coez5 - coez7) * ck; + // phi1z[4]; phi1z[5]; + phi1z[6] += -(2 * coez5 - coez7) * ck; // phi1z[7]; phi1z[8]; phi1z[9]; } @@ -278,23 +274,20 @@ void pairMplar( // phi1z[2] += 2 * coez7 * qkyz; phi1z[3] += (2 * coez7 - coez9) * qkzz; // d2phi_d q - phi1[4] += coef5 * dk.z; - phi1[5] += coef5 * dk.z; - phi1[6] += (3 * coef5 - coef7) * dk.z; + // phi1[4]; phi1[5]; + phi1[6] += (2 * coef5 - coef7) * dk.z; // phi1[7]; phi1[8] += 2 * coef5 * dk.x; phi1[9] += 2 * coef5 * dk.y; // - phi2[4] += -coef5 * di.z; - phi2[5] += -coef5 * di.z; - phi2[6] += -(3 * coef5 - coef7) * di.z; + // phi2[4]; phi2[5]; + phi2[6] += -(2 * coef5 - coef7) * di.z; // phi2[7]; phi2[8] += -2 * coef5 * di.x; phi2[9] += -2 * coef5 * di.y; // - phi1z[4] += -coez7 * dk.z; - phi1z[5] += -coez7 * dk.z; - phi1z[6] += -(3 * coez7 - coez9) * dk.z; + // phi1z[4]; phi1z[5]; + phi1z[6] += -(2 * coez7 - coez9) * dk.z; // phi1z[7]; phi1z[8] += -2 * coez7 * dk.x; phi1z[9] += -2 * coez7 * dk.y; @@ -310,35 +303,28 @@ void pairMplar( // real coez9 = bn[4] * r2 * r; real coez11 = bn[5] * r2 * r2 * r; // - phi1[4] += 2 * coef5 * qkxx - coef7 * qkzz; - phi1[5] += 2 * coef5 * qkyy - coef7 * qkzz; - phi1[6] += (2 * coef5 - 5 * coef7 + coef9) * qkzz; + phi1[4] += 2 * coef5 * qkxx; + phi1[5] += 2 * coef5 * qkyy; + phi1[6] += (2 * coef5 - 4 * coef7 + coef9) * qkzz; phi1[7] += 4 * coef5 * qkxy; phi1[8] += 4 * (coef5 - coef7) * qkxz; phi1[9] += 4 * (coef5 - coef7) * qkyz; // - phi2[4] += 2 * coef5 * qixx - coef7 * qizz; - phi2[5] += 2 * coef5 * qiyy - coef7 * qizz; - phi2[6] += (2 * coef5 - 5 * coef7 + coef9) * qizz; + phi2[4] += 2 * coef5 * qixx; + phi2[5] += 2 * coef5 * qiyy; + phi2[6] += (2 * coef5 - 4 * coef7 + coef9) * qizz; phi2[7] += 4 * coef5 * qixy; phi2[8] += 4 * (coef5 - coef7) * qixz; phi2[9] += 4 * (coef5 - coef7) * qiyz; // - phi1z[4] += 2 * coez7 * qkxx + (2 * coez7 - coez9) * qkzz; - phi1z[5] += 2 * coez7 * qkyy + (2 * coez7 - coez9) * qkzz; - phi1z[6] += (12 * coez7 - 9 * coez9 + coez11) * qkzz; + phi1z[4] += 2 * coez7 * qkxx; + phi1z[5] += 2 * coez7 * qkyy; + phi1z[6] += (10 * coez7 - 8 * coez9 + coez11) * qkzz; phi1z[7] += 4 * coez7 * qkxy; phi1z[8] += 4 * (3 * coez7 - coez9) * qkxz; phi1z[9] += 4 * (3 * coez7 - coez9) * qkyz; } - #pragma unroll - for (int i = 0; i < 10; ++i) { - phi1[i] *= mscale; - phi2[i] *= mscale; - phi1z[i] *= mscale; - } - if CONSTEXPR (do_e) { real e = phi1[0] * ci + phi1[1] * di.x + phi1[2] * di.y + phi1[3] * di.z + phi1[4] * qixx + phi1[5] * qiyy + phi1[6] * qizz + phi1[7] * qixy + phi1[8] * qixz + phi1[9] * qiyz; @@ -418,23 +404,20 @@ void pairMplar( // real coepz7 = pscale * coez7; real coepz9 = pscale * coez9; // d2phi_u q - phi1[4] += coed5 * ukp.z + coep5 * ukd.z; - phi1[5] += coed5 * ukp.z + coep5 * ukd.z; - phi1[6] += (3 * coed5 - coed7) * ukp.z + (3 * coep5 - coep7) * ukd.z; + // phi1[4]; phi1[5]; + phi1[6] += (2 * coed5 - coed7) * ukp.z + (2 * coep5 - coep7) * ukd.z; // phi1[7]; phi1[8] += 2 * (coed5 * ukp.x + coep5 * ukd.x); phi1[9] += 2 * (coed5 * ukp.y + coep5 * ukd.y); // - phi2[4] += -(coed5 * uip.z + coep5 * uid.z); - phi2[5] += -(coed5 * uip.z + coep5 * uid.z); - phi2[6] += -(3 * coed5 - coed7) * uip.z - (3 * coep5 - coep7) * uid.z; + // phi2[4]; phi2[5]; + phi2[6] += -(2 * coed5 - coed7) * uip.z - (2 * coep5 - coep7) * uid.z; // phi2[7]; phi2[8] += -2 * (coed5 * uip.x + coep5 * uid.x); phi2[9] += -2 * (coed5 * uip.y + coep5 * uid.y); // - phi1z[4] += -(coedz7 * ukp.z + coepz7 * ukd.z); - phi1z[5] += -(coedz7 * ukp.z + coepz7 * ukd.z); - phi1z[6] += -(3 * coedz7 - coedz9) * ukp.z - (3 * coepz7 - coepz9) * ukd.z; + // phi1z[4]; phi1z[5]; + phi1z[6] += -(2 * coedz7 - coedz9) * ukp.z - (2 * coepz7 - coepz9) * ukd.z; // phi1z[7]; phi1z[8] += -2 * (coedz7 * ukp.x + coepz7 * ukd.x); phi1z[9] += -2 * (coedz7 * ukp.y + coepz7 * ukd.y); diff --git a/src/cu/amoeba/emplar_cu1.cc b/src/cu/amoeba/emplar_cu1.cc index 800f5a717..d0637ff71 100644 --- a/src/cu/amoeba/emplar_cu1.cc +++ b/src/cu/amoeba/emplar_cu1.cc @@ -1,4 +1,4 @@ -// ck.py Version 3.0.0 +// ck.py Version 3.0.2 template <class Ver, class ETYP> __global__ void emplar_cu1c(TINKER_IMAGE_PARAMS, EnergyBuffer restrict ebuf, VirialBuffer restrict vbuf, grad_prec* restrict gx, diff --git a/src/cu/amoeba/empole_cu1.cc b/src/cu/amoeba/empole_cu1.cc index 7ae0a0e23..7876775ca 100644 --- a/src/cu/amoeba/empole_cu1.cc +++ b/src/cu/amoeba/empole_cu1.cc @@ -1,4 +1,4 @@ -// ck.py Version 3.0.0 +// ck.py Version 3.0.2 template <class Ver, class ETYP> __global__ void empole_cu1(int n, TINKER_IMAGE_PARAMS, CountBuffer restrict nem, EnergyBuffer restrict em, diff --git a/src/cu/amoeba/epolar_cu1.cc b/src/cu/amoeba/epolar_cu1.cc index bcd982cf0..d7a3b8b7a 100644 --- a/src/cu/amoeba/epolar_cu1.cc +++ b/src/cu/amoeba/epolar_cu1.cc @@ -1,4 +1,4 @@ -// ck.py Version 3.0.0 +// ck.py Version 3.0.2 template <class Ver, class ETYP> __global__ void epolar_cu1(int n, TINKER_IMAGE_PARAMS, CountBuffer restrict nep, EnergyBuffer restrict ep, diff --git a/src/cu/amoeba/sparsePrecond_cu1.cc b/src/cu/amoeba/sparsePrecond_cu1.cc index 92376155b..de44dc1b5 100644 --- a/src/cu/amoeba/sparsePrecond_cu1.cc +++ b/src/cu/amoeba/sparsePrecond_cu1.cc @@ -1,4 +1,4 @@ -// ck.py Version 3.0.0 +// ck.py Version 3.0.2 __global__ void sparsePrecond_cu1(int n, TINKER_IMAGE_PARAMS, real off, const unsigned* restrict uinfo, int nexclude, const int (*restrict exclude)[2], const real* restrict exclude_scale, const real* restrict x, const real* restrict y, diff --git a/src/cu/amoeba/ufield_cu1.cc b/src/cu/amoeba/ufield_cu1.cc index 96d4c6ff1..462f84511 100644 --- a/src/cu/amoeba/ufield_cu1.cc +++ b/src/cu/amoeba/ufield_cu1.cc @@ -1,4 +1,4 @@ -// ck.py Version 3.0.0 +// ck.py Version 3.0.2 template <class ETYP> __global__ void ufield_cu1(int n, TINKER_IMAGE_PARAMS, real off, const unsigned* restrict uinfo, int nexclude, diff --git a/src/cu/echarge_cu1.cc b/src/cu/echarge_cu1.cc index c3fa8ef6a..db42dba4c 100644 --- a/src/cu/echarge_cu1.cc +++ b/src/cu/echarge_cu1.cc @@ -1,4 +1,4 @@ -// ck.py Version 3.0.1 +// ck.py Version 3.0.2 template <class Ver, class ETYP> __global__ void echarge_cu1(int n, TINKER_IMAGE_PARAMS, CountBuffer restrict nec, EnergyBuffer restrict ec, diff --git a/src/cu/ehal_cu1.cc b/src/cu/ehal_cu1.cc index 9422c4dd6..f929eab12 100644 --- a/src/cu/ehal_cu1.cc +++ b/src/cu/ehal_cu1.cc @@ -1,4 +1,4 @@ -// ck.py Version 3.0.1 +// ck.py Version 3.0.2 template <class Ver> __global__ void ehal_cu1(int n, TINKER_IMAGE_PARAMS, CountBuffer restrict nev, EnergyBuffer restrict ev, VirialBuffer restrict vev, diff --git a/src/cu/elj_cu1.cc b/src/cu/elj_cu1.cc index 6062517f7..690f6d6b1 100644 --- a/src/cu/elj_cu1.cc +++ b/src/cu/elj_cu1.cc @@ -1,4 +1,4 @@ -// ck.py Version 3.0.1 +// ck.py Version 3.0.2 template <class Ver> __global__ void elj_cu1(int n, TINKER_IMAGE_PARAMS, CountBuffer restrict nev, EnergyBuffer restrict ev, VirialBuffer restrict vev, diff --git a/src/cu/hippo/dfieldChgpen_cu1.cc b/src/cu/hippo/dfieldChgpen_cu1.cc index 2b88291ce..0818fcfd6 100644 --- a/src/cu/hippo/dfieldChgpen_cu1.cc +++ b/src/cu/hippo/dfieldChgpen_cu1.cc @@ -1,4 +1,4 @@ -// ck.py Version 3.0.1 +// ck.py Version 3.0.2 template <class ETYP> __global__ void dfieldChgpen_cu1(int n, TINKER_IMAGE_PARAMS, real off, const unsigned* restrict dinfo, int nexclude, diff --git a/src/cu/hippo/sparsePrecond_cu4.cc b/src/cu/hippo/sparsePrecond_cu4.cc index 47928ced1..c56d8b748 100644 --- a/src/cu/hippo/sparsePrecond_cu4.cc +++ b/src/cu/hippo/sparsePrecond_cu4.cc @@ -1,4 +1,4 @@ -// ck.py Version 3.0.1 +// ck.py Version 3.0.2 __global__ void sparsePrecond_cu4(int n, TINKER_IMAGE_PARAMS, real off, const unsigned* restrict winfo, int nexclude, const int (*restrict exclude)[2], const real* restrict exclude_scale, const real* restrict x, const real* restrict y, diff --git a/src/cu/hippo/sparsePrecond_cu6.cc b/src/cu/hippo/sparsePrecond_cu6.cc index 35d4b2173..806312d4a 100644 --- a/src/cu/hippo/sparsePrecond_cu6.cc +++ b/src/cu/hippo/sparsePrecond_cu6.cc @@ -1,4 +1,4 @@ -// ck.py Version 3.0.1 +// ck.py Version 3.0.2 __global__ void sparsePrecond_cu6(int n, TINKER_IMAGE_PARAMS, real off, const unsigned* restrict uinfo, int nexclude, const int (*restrict exclude)[2], const real* restrict exclude_scale, const real* restrict x, const real* restrict y, diff --git a/src/cu/hippo/ufieldChgpen_cu1.cc b/src/cu/hippo/ufieldChgpen_cu1.cc index cdd131354..eafddc639 100644 --- a/src/cu/hippo/ufieldChgpen_cu1.cc +++ b/src/cu/hippo/ufieldChgpen_cu1.cc @@ -1,4 +1,4 @@ -// ck.py Version 3.0.1 +// ck.py Version 3.0.2 template <class ETYP> __global__ void ufieldChgpen_cu1(int n, TINKER_IMAGE_PARAMS, real off, const unsigned* restrict winfo, int nexclude,