Skip to content
This repository has been archived by the owner on Dec 21, 2018. It is now read-only.

[WIP] Binary Operators #94

Open
wants to merge 74 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from 15 commits
Commits
Show all changes
74 commits
Select commit Hold shift + click to select a range
b92f977
Binary Operators
Aug 8, 2018
40577cb
Binary Operations
Aug 17, 2018
916b695
Binary Operations
Aug 20, 2018
cf20eef
Binary Operations
Aug 21, 2018
144aa49
Binary Operations
Aug 21, 2018
8b228bb
Binary Operations
Aug 21, 2018
3717b4f
Binary Operations
Aug 22, 2018
a24d15f
Binary Operations
Aug 22, 2018
2e384e6
Binary Operations
Aug 23, 2018
a11dc77
Binary Operations
Aug 23, 2018
0097521
Binary Operations
Aug 23, 2018
88d3c45
Binary Operations
Aug 24, 2018
280f52a
Binary Operations
Aug 24, 2018
fc9efab
Binary Operations
Aug 24, 2018
c397a7e
Binary Operations
Aug 24, 2018
1ac172d
Merge branch 'master' into binary-operators-draft
ironbit Aug 28, 2018
335485e
Binary Operations
Aug 28, 2018
d112885
Merge branch 'binary-operators-draft' of github.com:BlazingDB/libgdf …
Aug 28, 2018
6145569
Binary Operations
Aug 30, 2018
d311a81
Binary Operations
Aug 30, 2018
74c0b68
Binary Operations
Aug 31, 2018
d0e4dfe
Binary Operations
Aug 31, 2018
891e2d9
Binary Operations
Sep 1, 2018
3a18c62
Binary Operations
Sep 3, 2018
ba15ada
Binary Operations
Sep 3, 2018
191e549
Binary Operations
Sep 3, 2018
6061920
Binary Operations
Sep 4, 2018
5048f24
Binary Operations
Sep 4, 2018
35f64e5
Added libgdf static library target as optional (disabled by default).
Sep 4, 2018
6667ade
Binary Operations
Sep 4, 2018
34579a4
Changed the name of the static library (libgdf.a).
Sep 4, 2018
55633c9
Minor fixes for LIBGDF_STATIC_LIB and update README.md
aucahuasi Sep 5, 2018
3c359c0
Link gdf against cuda and nvrtc libs
aucahuasi Sep 5, 2018
e427005
Binary Operations
Sep 5, 2018
e00479b
Add cuda runtime libs
aucahuasi Sep 5, 2018
42d24f4
Merge branch 'binary-operators-draft' of https://github.com/BlazingDB…
aucahuasi Sep 5, 2018
ccc3bc9
Merge branch 'master' into binary-operators-draft
Sep 5, 2018
52a7aa4
Updated branch 'binary-operators-draft'
Sep 5, 2018
f08a771
Binary Operations
Sep 5, 2018
5313068
Binary Operations
Sep 6, 2018
b46cd2a
Binary Operations
Sep 6, 2018
b1f5ae2
Binary Operations
Sep 6, 2018
82e99c8
Binary Operations
Sep 6, 2018
6509c76
Binary Operations
Sep 6, 2018
c093441
test solution
aucahuasi Sep 7, 2018
214236c
upgrade py package
aucahuasi Sep 7, 2018
1a9d550
Binary Operations
Sep 18, 2018
29e81ea
Merge branch 'master' into binary-operators-draft
Sep 18, 2018
65d207e
Merge branch 'master' into binary-operators-draft
Sep 19, 2018
f3e7768
Merge branch 'binary-operators-draft' of github.com:BlazingDB/libgdf …
Sep 19, 2018
12a4f5c
Binary Operations
Sep 19, 2018
6073b03
Merge branch 'master' into binary-operators-draft
Sep 19, 2018
47b0905
Binary Operations
Sep 24, 2018
9ba4b73
Binary Operations
Sep 25, 2018
6f6d804
Binary Operations
Sep 25, 2018
96012d9
Binary Operations
Sep 25, 2018
5773a58
Binary Operations
Sep 27, 2018
55c5fd2
Merge branch 'master' into binary-operators-draft
Sep 27, 2018
10dff5d
Binary Operations
Sep 28, 2018
729cbfa
Merge branch 'master' into binary-operators-draft
Sep 28, 2018
a3cbde8
Binary Operations
Sep 29, 2018
59b5ba6
Merge branch 'master' into binary-operators-draft
Sep 29, 2018
36241ec
Merge branch 'master' into binary-operators-draft
Oct 11, 2018
1c521a1
Binary Operations
Oct 17, 2018
b6f5cce
Binary Operations
Oct 17, 2018
a166c7c
Binary Operations
Oct 17, 2018
3a74ff8
new order by api
Oct 22, 2018
f7c3d4f
[binary-operators-draft]: udpate get_column_byte_width to support uns…
aocsa Oct 24, 2018
cc75849
[binary-operator-draft] update gpu_apply_stencil to work with least-s…
aocsa Oct 30, 2018
2c356d0
fixed some issues with binary ops not being able to output if the spa…
Nov 1, 2018
d0a76c1
made a small fix to the sorting code so it gets the size properly, it…
Nov 2, 2018
dfa27b8
removed some couts
Nov 2, 2018
e47aebe
removed some couts
Nov 2, 2018
9722f0e
TODO use a fixed compute capability
aucahuasi Nov 6, 2018
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions .gitmodules
Original file line number Diff line number Diff line change
Expand Up @@ -4,3 +4,6 @@
[submodule "thirdparty/moderngpu"]
path = thirdparty/moderngpu
url = https://github.com/moderngpu/moderngpu.git
[submodule "thirdparty/jitify"]
path = thirdparty/jitify
url = [email protected]:NVIDIA/jitify.git
9 changes: 9 additions & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -73,8 +73,10 @@ else()
endif()

include_directories(
"${CMAKE_CURRENT_SOURCE_DIR}/src"
"${CMAKE_CURRENT_SOURCE_DIR}/include"
"${CMAKE_CURRENT_SOURCE_DIR}/thirdparty/cub"
"${CMAKE_CURRENT_SOURCE_DIR}/thirdparty/jitify"
"${CMAKE_CURRENT_SOURCE_DIR}/thirdparty/moderngpu/src"
"${CUDA_INCLUDE_DIRS}"
"${ARROW_INCLUDEDIR}"
Expand Down Expand Up @@ -107,6 +109,12 @@ if(HT_LEGACY_ALLOCATOR)
endif()


## Binary Operators
add_subdirectory(src/binary)
get_directory_property(gdfs_source_files DIRECTORY ${CMAKE_CURRENT_LIST_DIR}/src/binary DEFINITION gdfs_source_files)
##


cuda_add_library(gdf SHARED
src/binaryops.cu
src/column.cpp
Expand All @@ -121,6 +129,7 @@ cuda_add_library(gdf SHARED
src/segmented_sorting.cu
src/datetimeops.cu
src/sqls_ops.cu
${gdfs_source_files}
)

target_link_libraries(gdf arrow)
Expand Down
19 changes: 18 additions & 1 deletion include/gdf/cffi/functions.h
Original file line number Diff line number Diff line change
Expand Up @@ -321,8 +321,25 @@ gdf_error gdf_extract_datetime_minute(gdf_column *input, gdf_column *output);
gdf_error gdf_extract_datetime_second(gdf_column *input, gdf_column *output);


/* binary operators */
/**
* Binary Operations
*/
gdf_error gdf_binary_operation_v_s_v(gdf_column* out, gdf_scalar* vax, gdf_column* vay, gdf_binary_operator ope);

gdf_error gdf_binary_operation_v_v_s(gdf_column* out, gdf_column* vax, gdf_scalar* vay, gdf_binary_operator ope);

gdf_error gdf_binary_operation_v_v_v(gdf_column* out, gdf_column* vax, gdf_column* vay, gdf_binary_operator ope);


gdf_error gdf_binary_operation_v_s_v_d(gdf_column* out, gdf_scalar* vax, gdf_column* vay, gdf_scalar* def, gdf_binary_operator ope);

gdf_error gdf_binary_operation_v_v_s_d(gdf_column* out, gdf_column* vax, gdf_scalar* vay, gdf_scalar* def, gdf_binary_operator ope);

gdf_error gdf_binary_operation_v_v_v_d(gdf_column* out, gdf_column* vax, gdf_column* vay, gdf_scalar* def, gdf_binary_operator ope);



/* binary operators */
/* arith */

gdf_error gdf_add_generic(gdf_column *lhs, gdf_column *rhs, gdf_column *output);
Expand Down
49 changes: 49 additions & 0 deletions include/gdf/cffi/types.h
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,10 @@ typedef enum {
GDF_INT16,
GDF_INT32,
GDF_INT64,
GDF_UINT8,
GDF_UINT16,
GDF_UINT32,
GDF_UINT64,
GDF_FLOAT32,
GDF_FLOAT64,
GDF_DATE32, // int32_t days since the UNIX epoch
Expand All @@ -16,6 +20,23 @@ typedef enum {
N_GDF_TYPES, /* additional types should go BEFORE N_GDF_TYPES */
} gdf_dtype;

union gdf_data {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Are the names in this union required to only be four characters for some reason? It would be nice to spell them out better, especially "invd", "tmst", "dt32" and "dt64".

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

There isn't a particular reason. Is there any name style and code style for this project?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Human readable variable names is good practice for every project.

void* invd;
int8_t si08;
int16_t si16;
int32_t si32;
int64_t si64;
uint8_t ui08;
uint16_t ui16;
uint32_t ui32;
uint64_t ui64;
float fp32;
double fp64;
int32_t dt32; // GDF_DATE32
int64_t dt64; // GDF_DATE64
int64_t tmst; // GDF_TIMESTAMP
};

typedef enum {
GDF_SUCCESS=0,
GDF_CUDA_ERROR,
Expand Down Expand Up @@ -47,6 +68,11 @@ typedef struct {
// here we can also hold info for decimal datatype or any other datatype that requires additional information
} gdf_dtype_extra_info;

struct gdf_scalar {
gdf_data data;
gdf_dtype dtype;
};

typedef struct gdf_column_{
void *data;
gdf_valid_type *valid;
Expand All @@ -71,6 +97,29 @@ typedef enum {
N_GDF_AGG_OPS, /* additional aggregation ops should go BEFORE N_GDF_... */
} gdf_agg_op;


enum gdf_binary_operator {
GDF_ADD,
GDF_SUB,
GDF_MUL,
GDF_DIV,
GDF_TRUE_DIV,
GDF_FLOOR_DIV,
GDF_MOD,
GDF_POW,
//GDF_COMBINE,
//GDF_COMBINE_FIRST,
//GDF_ROUND,
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't think that it makes sense to have intermediate parts of an enum commented out.

Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This can lead to API instabilities.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm so sorry, I hadn't seen your message.
It is used only as an enum (not casting to integers).
It could be error-prone. In any case, it will be moved to the end.

GDF_EQUAL,
GDF_NOT_EQUAL,
GDF_LESS,
GDF_GREATER,
GDF_LESS_EQUAL,
GDF_GREATER_EQUAL,
//GDF_PRODUCT,
//GDF_DOT
};

/* additonal flags */
typedef struct gdf_context_{
int flag_sorted; /* 0 = No, 1 = yes */
Expand Down
29 changes: 29 additions & 0 deletions src/binary/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
# cmake -DCMAKE_BUILD_TYPE=Release -DBINARY_OPERATION_VERSION:STRING=V1 ../../code/libgdf

if (NOT DEFINED BINARY_OPERATION_VERSION)
list(APPEND gdfs_source_files "")
return()
endif()


if (${BINARY_OPERATION_VERSION} STREQUAL "V1")
message("BINARY_OPERATION_VERSION: V1 Selected")
list(APPEND gdfs_source_files
"${CMAKE_CURRENT_LIST_DIR}/common/types.cpp"
"${CMAKE_CURRENT_LIST_DIR}/common/mediator.cu"
)
endif()


if (${BINARY_OPERATION_VERSION} STREQUAL "V2")
message("BINARY_OPERATION_VERSION: V2 Selected")
list(APPEND gdfs_source_files
"${CMAKE_CURRENT_LIST_DIR}/binary2/binary.cpp"
"${CMAKE_CURRENT_LIST_DIR}/binary2/kernel_gdf_data.cpp"
"${CMAKE_CURRENT_LIST_DIR}/binary2/kernel.cpp"
"${CMAKE_CURRENT_LIST_DIR}/binary2/launcher.cpp"
"${CMAKE_CURRENT_LIST_DIR}/binary2/operation.cpp"
"${CMAKE_CURRENT_LIST_DIR}/binary2/traits.cpp"
"${CMAKE_CURRENT_LIST_DIR}/binary2/type.cpp"
)
endif()
61 changes: 61 additions & 0 deletions src/binary/binary2/binary.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,61 @@
#include "gdf/gdf.h"
#include "binary/binary2/launcher.h"

namespace gdf {
gdf_error binary_operation(gdf_column* out, gdf_column* vax, gdf_scalar* vay, gdf_binary_operator ope) {
gdf::Launcher::launch().kernel("kernel_v_s")
.instantiate(out, vax, vay, ope)
.launch(out, vax, vay);

return GDF_SUCCESS;
}

gdf_error binary_operation(gdf_column* out, gdf_column* vax, gdf_column* vay, gdf_binary_operator ope) {
gdf::Launcher::launch().kernel("kernel_v_v")
.instantiate(out, vax, vay, ope)
.launch(out, vax, vay);

return GDF_SUCCESS;
}

gdf_error binary_operation(gdf_column* out, gdf_column* vax, gdf_scalar* vay, gdf_scalar* def, gdf_binary_operator ope) {
gdf::Launcher::launch().kernel("kernel_v_s_d")
.instantiate(out, vax, vay, def, ope)
.launch(out, vax, vay, def);

return GDF_SUCCESS;
}

gdf_error binary_operation(gdf_column* out, gdf_column* vax, gdf_column* vay, gdf_scalar* def, gdf_binary_operator ope) {
gdf::Launcher::launch().kernel("kernel_v_v_d")
.instantiate(out, vax, vay, def, ope)
.launch(out, vax, vay, def);

return GDF_SUCCESS;
}
}


gdf_error gdf_binary_operation_v_s_v(gdf_column* out, gdf_scalar* vax, gdf_column* vay, gdf_binary_operator ope) {
return gdf::binary_operation(out, vay, vax, ope);
}

gdf_error gdf_binary_operation_v_v_s(gdf_column* out, gdf_column* vax, gdf_scalar* vay, gdf_binary_operator ope) {
return gdf::binary_operation(out, vax, vay, ope);
}

gdf_error gdf_binary_operation_v_v_v(gdf_column* out, gdf_column* vax, gdf_column* vay, gdf_binary_operator ope) {
return gdf::binary_operation(out, vax, vay, ope);
}

gdf_error gdf_binary_operation_v_s_v_d(gdf_column* out, gdf_scalar* vax, gdf_column* vay, gdf_scalar* def, gdf_binary_operator ope) {
return gdf::binary_operation(out, vay, vax, def, ope);
}

gdf_error gdf_binary_operation_v_v_s_d(gdf_column* out, gdf_column* vax, gdf_scalar* vay, gdf_scalar* def, gdf_binary_operator ope) {
return gdf::binary_operation(out, vax, vay, def, ope);
}

gdf_error gdf_binary_operation_v_v_v_d(gdf_column* out, gdf_column* vax, gdf_column* vay, gdf_scalar* def, gdf_binary_operator ope) {
return gdf::binary_operation(out, vax, vay, def, ope);
}
15 changes: 15 additions & 0 deletions src/binary/binary2/cuda.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
#ifndef GDF_BINARY_CUDA_H
#define GDF_BINARY_CUDA_H

namespace gdf {
namespace cuda {

extern const char* kernel;
extern const char* traits;
extern const char* operation;
extern const char* kernel_gdf_data;

}
}

#endif
141 changes: 141 additions & 0 deletions src/binary/binary2/kernel.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,141 @@
namespace gdf {
namespace cuda {

const char* kernel =
R"***(
#include <cstdint>
#include "traits.h"
#include "operation.h"
#include "kernel_gdf_data.h"

#define WARP_SIZE 32
#define WARP_MASK 0xFFFFFFFF

__device__ __forceinline__
uint32_t isValid(int tid, uint32_t* valid, uint32_t mask) {
return valid[tid / WARP_SIZE] & mask;
}

__device__ __forceinline__
void shiftMask(uint32_t& mask) {
#pragma unroll
for (int offset = 16; offset > 0; offset /= 2) {
mask += __shfl_down_sync(WARP_MASK, mask, offset);
}
}

template <typename TypeOut, typename TypeVax, typename TypeVay, typename TypeOpe>
__global__
void kernel_v_s(int size, TypeOut* out_data, TypeVax* vax_data, gdf_data vay_data) {
harrism marked this conversation as resolved.
Show resolved Hide resolved
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We need kernel_s_v as well - should be 6 combinations overall, one kernel per the top level GDF function

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Implemented the remaining scalar-vector operations.

int tid = threadIdx.x;
int blkid = blockIdx.x;
int blksz = blockDim.x;
int gridsz = gridDim.x;

int start = tid + blkid * blksz;
int step = blksz * gridsz;

for (int i=start; i<size; i+=step) {
AbstractOperation<TypeOpe> operation;
out_data[i] = operation.template operate<TypeOut, TypeVax, TypeVay>(vax_data[i], (TypeVay)vay_data);
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We need to set the output valid bit mask - I don't see it being handled in the code. It should be an OR between the two bit masks of the two input operands.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The output valid bitmask is processed in all kernels.
The kernels also have been optimized using 'Bit Hacks'. In the benchmark, the kernel (v_v_v_d) reduces its time approx. in 5us in all of its benchmarks, while the kernel (v_v_v) increments its time approx. in 7us due to the bitmask processing.
https://github.com/BlazingDB/libgdf/blob/729cbfac6ae2281894b99644c58643184bd18d85/src/binary-operation/jit/code/kernel.cpp#L82

}
}

template <typename TypeOut, typename TypeVax, typename TypeVay, typename TypeOpe>
__global__
void kernel_v_v(int size, TypeOut* out_data, TypeVax* vax_data, TypeVay* vay_data) {
int tid = threadIdx.x;
int blkid = blockIdx.x;
int blksz = blockDim.x;
int gridsz = gridDim.x;

int start = tid + blkid * blksz;
int step = blksz * gridsz;

for (int i=start; i<size; i+=step) {
AbstractOperation<TypeOpe> operation;
out_data[i] = operation.template operate<TypeOut, TypeVax, TypeVay>(vax_data[i], vay_data[i]);
}
}

template <typename TypeOut, typename TypeVax, typename TypeVay, typename TypeDef, typename TypeOpe>
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Wow, TypeDef is very risky typename, considering its proximity to typedef. Could just use TDef, TVax, etc. Or reorder, putting Type last in the names.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

No problem, it will be changed.

__global__
void kernel_v_s_d(int size, gdf_data def_data,
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Should gdf_data on this line be TypeDef?

TypeOut* out_data, TypeVax* vax_data, gdf_data vay_data,
uint32_t* out_valid, uint32_t* vax_valid) {
int tid = threadIdx.x;
int blkid = blockIdx.x;
int blksz = blockDim.x;
int gridsz = gridDim.x;

int start = tid + blkid * blksz;
int step = blksz * gridsz;

for (int i=start; i<size; i+=step) {
uint32_t mask = 1 << (i % WARP_SIZE);
uint32_t is_vax_valid = isValid(i, vax_valid, mask);

TypeVax vax_data_aux = vax_data[i];
if ((is_vax_valid & mask) != mask) {
vax_data_aux = (TypeDef)def_data;
}

AbstractOperation<TypeOpe> operation;
out_data[i] = operation.template operate<TypeOut, TypeVax, TypeVay>(vax_data_aux, (TypeVay)vay_data);

__syncwarp();
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What is the reason for __syncwarp() here? I don't see any sharing of data between threads. Is it necessary? Same question for all the other __syncwarp() calls.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It's not correct. It'll be removed.


shiftMask(mask);

if ((i % WARP_SIZE) == 0) {
out_valid[i / WARP_SIZE] = mask;
}
}
}


template <typename TypeOut, typename TypeVax, typename TypeVay, typename TypeDef, typename TypeOpe>
__global__
void kernel_v_v_d(int size, gdf_data def_data,
TypeOut* out_data, TypeVax* vax_data, TypeVay* vay_data,
uint32_t* out_valid, uint32_t* vax_valid, uint32_t* vay_valid) {
int tid = threadIdx.x;
int blkid = blockIdx.x;
int blksz = blockDim.x;
int gridsz = gridDim.x;

int start = tid + blkid * blksz;
int step = blksz * gridsz;

for (int i=start; i<size; i+=step) {
uint32_t mask = 1 << (i % WARP_SIZE);
uint32_t is_vax_valid = isValid(i, vax_valid, mask);
uint32_t is_vay_valid = isValid(i, vay_valid, mask);

TypeVax vax_data_aux = vax_data[i];
TypeVay vay_data_aux = vay_data[i];
if ((is_vax_valid & mask) != mask) {
vax_data_aux = (TypeDef)def_data;
}
else if ((is_vay_valid & mask) != mask) {
vay_data_aux = (TypeDef)def_data;
}
if ((is_vax_valid | is_vay_valid) == mask) {
AbstractOperation<TypeOpe> operation;
out_data[i] = operation.template operate<TypeOut, TypeVax, TypeVay>(vax_data_aux, vay_data_aux);
} else {
mask = 0;
}

__syncwarp();

shiftMask(mask);

if ((i % WARP_SIZE) == 0) {
out_valid[i / WARP_SIZE] = mask;
}
}
}
)***";
}
}
Loading