-
Notifications
You must be signed in to change notification settings - Fork 43
[WIP] Binary Operators #94
base: master
Are you sure you want to change the base?
Changes from 15 commits
b92f977
40577cb
916b695
cf20eef
144aa49
8b228bb
3717b4f
a24d15f
2e384e6
a11dc77
0097521
88d3c45
280f52a
fc9efab
c397a7e
1ac172d
335485e
d112885
6145569
d311a81
74c0b68
d0e4dfe
891e2d9
3a18c62
ba15ada
191e549
6061920
5048f24
35f64e5
6667ade
34579a4
55633c9
3c359c0
e427005
e00479b
42d24f4
ccc3bc9
52a7aa4
f08a771
5313068
b46cd2a
b1f5ae2
82e99c8
6509c76
c093441
214236c
1a9d550
29e81ea
65d207e
f3e7768
12a4f5c
6073b03
47b0905
9ba4b73
6f6d804
96012d9
5773a58
55c5fd2
10dff5d
729cbfa
a3cbde8
59b5ba6
36241ec
1c521a1
b6f5cce
a166c7c
3a74ff8
f7c3d4f
cc75849
2c356d0
d0a76c1
dfa27b8
e47aebe
9722f0e
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -4,3 +4,6 @@ | |
[submodule "thirdparty/moderngpu"] | ||
path = thirdparty/moderngpu | ||
url = https://github.com/moderngpu/moderngpu.git | ||
[submodule "thirdparty/jitify"] | ||
path = thirdparty/jitify | ||
url = [email protected]:NVIDIA/jitify.git |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -8,6 +8,10 @@ typedef enum { | |
GDF_INT16, | ||
GDF_INT32, | ||
GDF_INT64, | ||
GDF_UINT8, | ||
GDF_UINT16, | ||
GDF_UINT32, | ||
GDF_UINT64, | ||
GDF_FLOAT32, | ||
GDF_FLOAT64, | ||
GDF_DATE32, // int32_t days since the UNIX epoch | ||
|
@@ -16,6 +20,23 @@ typedef enum { | |
N_GDF_TYPES, /* additional types should go BEFORE N_GDF_TYPES */ | ||
} gdf_dtype; | ||
|
||
union gdf_data { | ||
void* invd; | ||
int8_t si08; | ||
int16_t si16; | ||
int32_t si32; | ||
int64_t si64; | ||
uint8_t ui08; | ||
uint16_t ui16; | ||
uint32_t ui32; | ||
uint64_t ui64; | ||
float fp32; | ||
double fp64; | ||
int32_t dt32; // GDF_DATE32 | ||
int64_t dt64; // GDF_DATE64 | ||
int64_t tmst; // GDF_TIMESTAMP | ||
}; | ||
|
||
typedef enum { | ||
GDF_SUCCESS=0, | ||
GDF_CUDA_ERROR, | ||
|
@@ -47,6 +68,11 @@ typedef struct { | |
// here we can also hold info for decimal datatype or any other datatype that requires additional information | ||
} gdf_dtype_extra_info; | ||
|
||
struct gdf_scalar { | ||
gdf_data data; | ||
gdf_dtype dtype; | ||
}; | ||
|
||
typedef struct gdf_column_{ | ||
void *data; | ||
gdf_valid_type *valid; | ||
|
@@ -71,6 +97,29 @@ typedef enum { | |
N_GDF_AGG_OPS, /* additional aggregation ops should go BEFORE N_GDF_... */ | ||
} gdf_agg_op; | ||
|
||
|
||
enum gdf_binary_operator { | ||
GDF_ADD, | ||
GDF_SUB, | ||
GDF_MUL, | ||
GDF_DIV, | ||
GDF_TRUE_DIV, | ||
GDF_FLOOR_DIV, | ||
GDF_MOD, | ||
GDF_POW, | ||
//GDF_COMBINE, | ||
//GDF_COMBINE_FIRST, | ||
//GDF_ROUND, | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I don't think that it makes sense to have intermediate parts of an enum commented out. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This can lead to API instabilities. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I'm so sorry, I hadn't seen your message. |
||
GDF_EQUAL, | ||
GDF_NOT_EQUAL, | ||
GDF_LESS, | ||
GDF_GREATER, | ||
GDF_LESS_EQUAL, | ||
GDF_GREATER_EQUAL, | ||
//GDF_PRODUCT, | ||
//GDF_DOT | ||
}; | ||
|
||
/* additonal flags */ | ||
typedef struct gdf_context_{ | ||
int flag_sorted; /* 0 = No, 1 = yes */ | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,29 @@ | ||
# cmake -DCMAKE_BUILD_TYPE=Release -DBINARY_OPERATION_VERSION:STRING=V1 ../../code/libgdf | ||
|
||
if (NOT DEFINED BINARY_OPERATION_VERSION) | ||
list(APPEND gdfs_source_files "") | ||
return() | ||
endif() | ||
|
||
|
||
if (${BINARY_OPERATION_VERSION} STREQUAL "V1") | ||
message("BINARY_OPERATION_VERSION: V1 Selected") | ||
list(APPEND gdfs_source_files | ||
"${CMAKE_CURRENT_LIST_DIR}/common/types.cpp" | ||
"${CMAKE_CURRENT_LIST_DIR}/common/mediator.cu" | ||
) | ||
endif() | ||
|
||
|
||
if (${BINARY_OPERATION_VERSION} STREQUAL "V2") | ||
message("BINARY_OPERATION_VERSION: V2 Selected") | ||
list(APPEND gdfs_source_files | ||
"${CMAKE_CURRENT_LIST_DIR}/binary2/binary.cpp" | ||
"${CMAKE_CURRENT_LIST_DIR}/binary2/kernel_gdf_data.cpp" | ||
"${CMAKE_CURRENT_LIST_DIR}/binary2/kernel.cpp" | ||
"${CMAKE_CURRENT_LIST_DIR}/binary2/launcher.cpp" | ||
"${CMAKE_CURRENT_LIST_DIR}/binary2/operation.cpp" | ||
"${CMAKE_CURRENT_LIST_DIR}/binary2/traits.cpp" | ||
"${CMAKE_CURRENT_LIST_DIR}/binary2/type.cpp" | ||
) | ||
endif() |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,61 @@ | ||
#include "gdf/gdf.h" | ||
#include "binary/binary2/launcher.h" | ||
|
||
namespace gdf { | ||
gdf_error binary_operation(gdf_column* out, gdf_column* vax, gdf_scalar* vay, gdf_binary_operator ope) { | ||
gdf::Launcher::launch().kernel("kernel_v_s") | ||
.instantiate(out, vax, vay, ope) | ||
.launch(out, vax, vay); | ||
|
||
return GDF_SUCCESS; | ||
} | ||
|
||
gdf_error binary_operation(gdf_column* out, gdf_column* vax, gdf_column* vay, gdf_binary_operator ope) { | ||
gdf::Launcher::launch().kernel("kernel_v_v") | ||
.instantiate(out, vax, vay, ope) | ||
.launch(out, vax, vay); | ||
|
||
return GDF_SUCCESS; | ||
} | ||
|
||
gdf_error binary_operation(gdf_column* out, gdf_column* vax, gdf_scalar* vay, gdf_scalar* def, gdf_binary_operator ope) { | ||
gdf::Launcher::launch().kernel("kernel_v_s_d") | ||
.instantiate(out, vax, vay, def, ope) | ||
.launch(out, vax, vay, def); | ||
|
||
return GDF_SUCCESS; | ||
} | ||
|
||
gdf_error binary_operation(gdf_column* out, gdf_column* vax, gdf_column* vay, gdf_scalar* def, gdf_binary_operator ope) { | ||
gdf::Launcher::launch().kernel("kernel_v_v_d") | ||
.instantiate(out, vax, vay, def, ope) | ||
.launch(out, vax, vay, def); | ||
|
||
return GDF_SUCCESS; | ||
} | ||
} | ||
|
||
|
||
gdf_error gdf_binary_operation_v_s_v(gdf_column* out, gdf_scalar* vax, gdf_column* vay, gdf_binary_operator ope) { | ||
return gdf::binary_operation(out, vay, vax, ope); | ||
} | ||
|
||
gdf_error gdf_binary_operation_v_v_s(gdf_column* out, gdf_column* vax, gdf_scalar* vay, gdf_binary_operator ope) { | ||
return gdf::binary_operation(out, vax, vay, ope); | ||
} | ||
|
||
gdf_error gdf_binary_operation_v_v_v(gdf_column* out, gdf_column* vax, gdf_column* vay, gdf_binary_operator ope) { | ||
return gdf::binary_operation(out, vax, vay, ope); | ||
} | ||
|
||
gdf_error gdf_binary_operation_v_s_v_d(gdf_column* out, gdf_scalar* vax, gdf_column* vay, gdf_scalar* def, gdf_binary_operator ope) { | ||
return gdf::binary_operation(out, vay, vax, def, ope); | ||
} | ||
|
||
gdf_error gdf_binary_operation_v_v_s_d(gdf_column* out, gdf_column* vax, gdf_scalar* vay, gdf_scalar* def, gdf_binary_operator ope) { | ||
return gdf::binary_operation(out, vax, vay, def, ope); | ||
} | ||
|
||
gdf_error gdf_binary_operation_v_v_v_d(gdf_column* out, gdf_column* vax, gdf_column* vay, gdf_scalar* def, gdf_binary_operator ope) { | ||
return gdf::binary_operation(out, vax, vay, def, ope); | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,15 @@ | ||
#ifndef GDF_BINARY_CUDA_H | ||
#define GDF_BINARY_CUDA_H | ||
|
||
namespace gdf { | ||
namespace cuda { | ||
|
||
extern const char* kernel; | ||
extern const char* traits; | ||
extern const char* operation; | ||
extern const char* kernel_gdf_data; | ||
|
||
} | ||
} | ||
|
||
#endif |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,141 @@ | ||
namespace gdf { | ||
namespace cuda { | ||
|
||
const char* kernel = | ||
R"***( | ||
#include <cstdint> | ||
#include "traits.h" | ||
#include "operation.h" | ||
#include "kernel_gdf_data.h" | ||
|
||
#define WARP_SIZE 32 | ||
#define WARP_MASK 0xFFFFFFFF | ||
|
||
__device__ __forceinline__ | ||
uint32_t isValid(int tid, uint32_t* valid, uint32_t mask) { | ||
return valid[tid / WARP_SIZE] & mask; | ||
} | ||
|
||
__device__ __forceinline__ | ||
void shiftMask(uint32_t& mask) { | ||
#pragma unroll | ||
for (int offset = 16; offset > 0; offset /= 2) { | ||
mask += __shfl_down_sync(WARP_MASK, mask, offset); | ||
} | ||
} | ||
|
||
template <typename TypeOut, typename TypeVax, typename TypeVay, typename TypeOpe> | ||
__global__ | ||
void kernel_v_s(int size, TypeOut* out_data, TypeVax* vax_data, gdf_data vay_data) { | ||
harrism marked this conversation as resolved.
Show resolved
Hide resolved
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. We need There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Implemented the remaining scalar-vector operations. |
||
int tid = threadIdx.x; | ||
int blkid = blockIdx.x; | ||
int blksz = blockDim.x; | ||
int gridsz = gridDim.x; | ||
|
||
int start = tid + blkid * blksz; | ||
int step = blksz * gridsz; | ||
|
||
for (int i=start; i<size; i+=step) { | ||
AbstractOperation<TypeOpe> operation; | ||
out_data[i] = operation.template operate<TypeOut, TypeVax, TypeVay>(vax_data[i], (TypeVay)vay_data); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. We need to set the output valid bit mask - I don't see it being handled in the code. It should be an OR between the two bit masks of the two input operands. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The output valid bitmask is processed in all kernels. |
||
} | ||
} | ||
|
||
template <typename TypeOut, typename TypeVax, typename TypeVay, typename TypeOpe> | ||
__global__ | ||
void kernel_v_v(int size, TypeOut* out_data, TypeVax* vax_data, TypeVay* vay_data) { | ||
int tid = threadIdx.x; | ||
int blkid = blockIdx.x; | ||
int blksz = blockDim.x; | ||
int gridsz = gridDim.x; | ||
|
||
int start = tid + blkid * blksz; | ||
int step = blksz * gridsz; | ||
|
||
for (int i=start; i<size; i+=step) { | ||
AbstractOperation<TypeOpe> operation; | ||
out_data[i] = operation.template operate<TypeOut, TypeVax, TypeVay>(vax_data[i], vay_data[i]); | ||
} | ||
} | ||
|
||
template <typename TypeOut, typename TypeVax, typename TypeVay, typename TypeDef, typename TypeOpe> | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Wow, There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. No problem, it will be changed. |
||
__global__ | ||
void kernel_v_s_d(int size, gdf_data def_data, | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Should |
||
TypeOut* out_data, TypeVax* vax_data, gdf_data vay_data, | ||
uint32_t* out_valid, uint32_t* vax_valid) { | ||
int tid = threadIdx.x; | ||
int blkid = blockIdx.x; | ||
int blksz = blockDim.x; | ||
int gridsz = gridDim.x; | ||
|
||
int start = tid + blkid * blksz; | ||
int step = blksz * gridsz; | ||
|
||
for (int i=start; i<size; i+=step) { | ||
uint32_t mask = 1 << (i % WARP_SIZE); | ||
uint32_t is_vax_valid = isValid(i, vax_valid, mask); | ||
|
||
TypeVax vax_data_aux = vax_data[i]; | ||
if ((is_vax_valid & mask) != mask) { | ||
vax_data_aux = (TypeDef)def_data; | ||
} | ||
|
||
AbstractOperation<TypeOpe> operation; | ||
out_data[i] = operation.template operate<TypeOut, TypeVax, TypeVay>(vax_data_aux, (TypeVay)vay_data); | ||
|
||
__syncwarp(); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. What is the reason for There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. It's not correct. It'll be removed. |
||
|
||
shiftMask(mask); | ||
|
||
if ((i % WARP_SIZE) == 0) { | ||
out_valid[i / WARP_SIZE] = mask; | ||
} | ||
} | ||
} | ||
|
||
|
||
template <typename TypeOut, typename TypeVax, typename TypeVay, typename TypeDef, typename TypeOpe> | ||
__global__ | ||
void kernel_v_v_d(int size, gdf_data def_data, | ||
TypeOut* out_data, TypeVax* vax_data, TypeVay* vay_data, | ||
uint32_t* out_valid, uint32_t* vax_valid, uint32_t* vay_valid) { | ||
int tid = threadIdx.x; | ||
int blkid = blockIdx.x; | ||
int blksz = blockDim.x; | ||
int gridsz = gridDim.x; | ||
|
||
int start = tid + blkid * blksz; | ||
int step = blksz * gridsz; | ||
|
||
for (int i=start; i<size; i+=step) { | ||
uint32_t mask = 1 << (i % WARP_SIZE); | ||
uint32_t is_vax_valid = isValid(i, vax_valid, mask); | ||
uint32_t is_vay_valid = isValid(i, vay_valid, mask); | ||
|
||
TypeVax vax_data_aux = vax_data[i]; | ||
TypeVay vay_data_aux = vay_data[i]; | ||
if ((is_vax_valid & mask) != mask) { | ||
vax_data_aux = (TypeDef)def_data; | ||
} | ||
else if ((is_vay_valid & mask) != mask) { | ||
vay_data_aux = (TypeDef)def_data; | ||
} | ||
if ((is_vax_valid | is_vay_valid) == mask) { | ||
AbstractOperation<TypeOpe> operation; | ||
out_data[i] = operation.template operate<TypeOut, TypeVax, TypeVay>(vax_data_aux, vay_data_aux); | ||
} else { | ||
mask = 0; | ||
} | ||
|
||
__syncwarp(); | ||
|
||
shiftMask(mask); | ||
|
||
if ((i % WARP_SIZE) == 0) { | ||
out_valid[i / WARP_SIZE] = mask; | ||
} | ||
} | ||
} | ||
)***"; | ||
} | ||
} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Are the names in this union required to only be four characters for some reason? It would be nice to spell them out better, especially "invd", "tmst", "dt32" and "dt64".
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
There isn't a particular reason. Is there any name style and code style for this project?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Human readable variable names is good practice for every project.