Skip to content

Commit

Permalink
simplify amoeba mpole+polar
Browse files Browse the repository at this point in the history
  • Loading branch information
zhi-wang committed Oct 20, 2022
1 parent 0b94d62 commit 9b00ec0
Show file tree
Hide file tree
Showing 15 changed files with 127 additions and 176 deletions.
54 changes: 11 additions & 43 deletions ext/ext/ck3.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,12 @@
#!/usr/bin/env python3

'''
python3 this_script.py -c y3/config.yaml
or
python3 this_script.py y3/config.yaml
'''

import os
import re
Expand All @@ -14,15 +21,14 @@
########################################################################


alphabets = {
rc_alphabets = {
'0' : 'a', '1' : 'b', '2' : 'c', '3' : 'd', '4' : 'e',
'5' : 'f', '6' : 'g', '7' : 'h', '8' : 'i', '9' : 'j',
'10': 'k', '11': 'l', '12': 'm', '13': 'n', '14': 'o',
'15': 'p', '16': 'q', '17': 'r', '18': 's', '19': 't',
'20': 'u', '21': 'v', '22': 'w', '23': 'x', '24': 'y',
'25': 'z'}


rc_kernel2c = '''
TEMPLATE_PARAMS \
__global__ \
Expand All @@ -40,31 +46,24 @@
USING_DEVICE_VARIABLES KERNEL_CONSTEXPR_FLAGS \
const int ithread = threadIdx.x + blockIdx.x * blockDim.x;
DECLARE_ZERO_LOCAL_COUNT DECLARE_ZERO_LOCAL_ENERGY DECLARE_ZERO_LOCAL_VIRIAL
DECLARE_FORCE_I_AND_K DECLARE_PARAMS_I_AND_K
for (int ii = ithread; ii < nexclude; ii += blockDim.x * gridDim.x) {
KERNEL_SCALED_KLANE KERNEL_ZERO_LOCAL_FORCE
int i = exclude[ii][0];
int k = exclude[ii][1];
KERNEL_LOAD_1X_SCALES
KERNEL_INIT_EXCLUDE_PARAMS_I_AND_K
constexpr bool incl = true;
KERNEL_SCALED_PAIRWISE_INTERACTION
KERNEL_SAVE_LOCAL_FORCE
}
KERNEL_SUM_COUNT KERNEL_SUM_ENERGY KERNEL_SUM_VIRIAL
}
'''
Expand All @@ -91,20 +90,16 @@
const int nwarp = blockDim.x * gridDim.x / WARP_SIZE;
const int ilane = threadIdx.x & (WARP_SIZE - 1);
DECLARE_ZERO_LOCAL_COUNT DECLARE_ZERO_LOCAL_ENERGY DECLARE_ZERO_LOCAL_VIRIAL
DECLARE_PARAMS_I_AND_K DECLARE_FORCE_I_AND_K
for (int iw = iwarp; iw < nakpl; iw += nwarp) {
KERNEL_ZERO_LOCAL_FORCE
int tri, tx, ty;
tri = iakpl[iw];
tri_to_xy(tri, tx, ty);
int iid = ty * WARP_SIZE + ilane;
int atomi = min(iid, n - 1);
int i = sorted[atomi].unsorted;
Expand All @@ -114,7 +109,6 @@
KERNEL_INIT_PARAMS_I_AND_K
KERNEL_SYNCWARP
KERNEL_LOAD_INFO_VARIABLES
for (int j = 0; j < WARP_SIZE; ++j) {
int srclane = (ilane + j) & (WARP_SIZE - 1); \
Expand All @@ -124,16 +118,13 @@
KERNEL_SCALE_1 \
KERNEL_FULL_PAIRWISE_INTERACTION
iid = __shfl_sync(ALL_LANES, iid, ilane + 1);
KERNEL_SHUFFLE_PARAMS_I KERNEL_SHUFFLE_LOCAL_FORCE_I
}
KERNEL_SAVE_LOCAL_FORCE KERNEL_SYNCWARP
}
KERNEL_SUM_COUNT KERNEL_SUM_ENERGY KERNEL_SUM_VIRIAL
}
'''
Expand All @@ -159,15 +150,12 @@
const int nwarp = blockDim.x * gridDim.x / WARP_SIZE;
const int ilane = threadIdx.x & (WARP_SIZE - 1);
DECLARE_ZERO_LOCAL_COUNT DECLARE_ZERO_LOCAL_ENERGY DECLARE_ZERO_LOCAL_VIRIAL
DECLARE_PARAMS_I_AND_K DECLARE_FORCE_I_AND_K
for (int iw = iwarp; iw < niak; iw += nwarp) {
KERNEL_ZERO_LOCAL_FORCE
int ty = iak[iw];
int atomi = ty * WARP_SIZE + ilane;
int i = sorted[atomi].unsorted;
Expand All @@ -176,22 +164,18 @@
KERNEL_INIT_PARAMS_I_AND_K
KERNEL_SYNCWARP
for (int j = 0; j < WARP_SIZE; ++j) {
KERNEL_KLANE2 \
bool incl = atomk > 0; \
KERNEL_SCALE_1 \
KERNEL_FULL_PAIRWISE_INTERACTION
KERNEL_SHUFFLE_PARAMS_I KERNEL_SHUFFLE_LOCAL_FORCE_I
}
KERNEL_SAVE_LOCAL_FORCE KERNEL_SYNCWARP
}
KERNEL_SUM_COUNT KERNEL_SUM_ENERGY KERNEL_SUM_VIRIAL
}
'''
Expand Down Expand Up @@ -220,42 +204,33 @@
const int nwarp = blockDim.x * gridDim.x / WARP_SIZE;
const int ilane = threadIdx.x & (WARP_SIZE - 1);
DECLARE_ZERO_LOCAL_COUNT DECLARE_ZERO_LOCAL_ENERGY DECLARE_ZERO_LOCAL_VIRIAL
DECLARE_PARAMS_I_AND_K DECLARE_FORCE_I_AND_K
KERNEL_HAS_1X_SCALE
for (int ii = ithread; ii < nexclude; ii += blockDim.x * gridDim.x) {
KERNEL_SCALED_KLANE KERNEL_ZERO_LOCAL_FORCE
int i = exclude[ii][0];
int k = exclude[ii][1];
KERNEL_LOAD_1X_SCALES
KERNEL_INIT_EXCLUDE_PARAMS_I_AND_K
constexpr bool incl = true;
KERNEL_SCALED_PAIRWISE_INTERACTION
KERNEL_SAVE_LOCAL_FORCE
}
// */
for (int iw = iwarp; iw < nakpl; iw += nwarp) {
KERNEL_ZERO_LOCAL_FORCE
int tri, tx, ty;
tri = iakpl[iw];
tri_to_xy(tri, tx, ty);
int iid = ty * WARP_SIZE + ilane;
int atomi = min(iid, n - 1);
int i = sorted[atomi].unsorted;
Expand All @@ -265,7 +240,6 @@
KERNEL_INIT_PARAMS_I_AND_K
KERNEL_SYNCWARP
KERNEL_LOAD_INFO_VARIABLES
for (int j = 0; j < WARP_SIZE; ++j) {
int srclane = (ilane + j) & (WARP_SIZE - 1); \
Expand All @@ -275,20 +249,16 @@
KERNEL_SCALE_1 \
KERNEL_FULL_PAIRWISE_INTERACTION
iid = __shfl_sync(ALL_LANES, iid, ilane + 1);
KERNEL_SHUFFLE_PARAMS_I KERNEL_SHUFFLE_LOCAL_FORCE_I
}
KERNEL_SAVE_LOCAL_FORCE KERNEL_SYNCWARP
}
for (int iw = iwarp; iw < niak; iw += nwarp) {
KERNEL_ZERO_LOCAL_FORCE
int ty = iak[iw];
int atomi = ty * WARP_SIZE + ilane;
int i = sorted[atomi].unsorted;
Expand All @@ -297,22 +267,18 @@
KERNEL_INIT_PARAMS_I_AND_K
KERNEL_SYNCWARP
for (int j = 0; j < WARP_SIZE; ++j) {
KERNEL_KLANE2 \
bool incl = atomk > 0; \
KERNEL_SCALE_1 \
KERNEL_FULL_PAIRWISE_INTERACTION
KERNEL_SHUFFLE_PARAMS_I KERNEL_SHUFFLE_LOCAL_FORCE_I
}
KERNEL_SAVE_LOCAL_FORCE KERNEL_SYNCWARP
}
KERNEL_SUM_COUNT
KERNEL_SUM_ENERGY
KERNEL_SUM_VIRIAL
Expand Down Expand Up @@ -570,7 +536,7 @@ def _load_scale_param(ptype:str, stem:str, input:str, separate_scaled_pairwise:b
v = ''
for i in range(1,len(ss)):
idx = ss[i]
al = alphabets[idx]
al = rc_alphabets[idx]
if input is None:
if not separate_scaled_pairwise:
v = v + '{} {}{} = 1;'.format(t, stem, al)
Expand Down Expand Up @@ -610,6 +576,8 @@ def _kv(self, k:str):
return self.config[k]
else:
return ''


def cudaReplaceDict(self) -> dict:
d = {}
config = self.config
Expand Down
2 changes: 1 addition & 1 deletion src/cu/amoeba/dfield_cu1.cc
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
// ck.py Version 3.0.0
// ck.py Version 3.0.2
template <class ETYP>
__global__
void dfield_cu1(int n, TINKER_IMAGE_PARAMS, real off, const unsigned* restrict dpinfo, int nexclude,
Expand Down
Loading

0 comments on commit 9b00ec0

Please sign in to comment.