Skip to content
This repository has been archived by the owner on Dec 21, 2018. It is now read-only.

Commit

Permalink
new order by api
Browse files Browse the repository at this point in the history
  • Loading branch information
Felipe Aramburu committed Oct 22, 2018
1 parent a166c7c commit 3a74ff8
Show file tree
Hide file tree
Showing 3 changed files with 178 additions and 2 deletions.
8 changes: 8 additions & 0 deletions include/gdf/cffi/functions.h
Original file line number Diff line number Diff line change
Expand Up @@ -884,3 +884,11 @@ gdf_error gdf_quantile_aprrox( gdf_column* col_in, //input column;
double q, //requested quantile in [0,1]
void* t_erased_res, //type-erased result of same type as column;
gdf_context* ctxt); //context info



This comment has been minimized.

Copy link
@harrism

harrism Oct 22, 2018

Member

Please provide usage documentation. See this PR for comment style guidelines. #137

gdf_error gdf_order_by_asc_desc(
gdf_column * input_columns, //pointers to pointers of input columns
size_t num_inputs, //number of pointeres in the first parameter (e.g. number of columsn to sort by
gdf_column * output_indices, //a gdf_column that is pre allocated for storing sorted indices
gdf_valid_type * asc_desc_bitmask); //asc_desc bitmask e.g. 101 would mean sort the first and last oclumns ascending and the second one descending
117 changes: 115 additions & 2 deletions include/sqls_rtti_comp.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,8 @@
#include <thrust/advance.h>
#include <thrust/gather.h>

#include "gdf/utils.h"

//for int<n>_t:
//
#include <cstdint>
Expand All @@ -49,7 +51,8 @@ struct LesserRTTI
columns_(cols),
rtti_(types),
sz_(sz),
vals_(nullptr)
vals_(nullptr),
asc_desc_bitmask_(nullptr)
{
}

Expand All @@ -61,11 +64,65 @@ struct LesserRTTI
columns_(cols),
rtti_(types),
sz_(sz),
vals_(vals)
vals_(vals),
asc_desc_bitmask_(nullptr)
{
}

__host__ __device__
LesserRTTI(void* const* cols,
int* const types,
size_t sz,
gdf_valid_type* const asc_desc_bitmask):
columns_(cols),
rtti_(types),
sz_(sz),
vals_(nullptr),
asc_desc_bitmask_(asc_desc_bitmask)
{
}

/**
* Should be used when you want to sort multiple columns using asc / desc flags for each column
*
*
*/
__host__ __device__
bool asc_desc_comparison(IndexT row1, IndexT row2) const
{
for(size_t col_index = 0; col_index < sz_; ++col_index)
{
gdf_dtype col_type = static_cast<gdf_dtype>(rtti_[col_index]);
bool asc;

if(asc_desc_bitmask_ == nullptr){
asc = true;
}else{
asc = gdf_is_valid(asc_desc_bitmask_, col_index);
}
//if flag == true

State state;
if(asc){
OpLess less(row1, row2);
state =type_dispatcher(less, col_type, col_index);
}else{
OpGreater greater(row1, row2);
state =type_dispatcher(greater, col_type, col_index);
}

switch( state )
{
case State::False:
return false;
case State::True:
return true;
case State::Undecided:
break;
}
}
return false;
}

__host__ __device__
bool equal(IndexT row1, IndexT row2) const
Expand Down Expand Up @@ -187,6 +244,36 @@ struct LesserRTTI
IndexT row2_;
};

struct OpGreater
{
__host__ __device__
OpGreater(IndexT row1, IndexT row2):
row1_(row1),
row2_(row2)
{
}

template<typename ColType>
__host__ __device__
State operator() (int col_index,
const void* const * columns,
ColType )
{
ColType res1 = LesserRTTI::at<ColType>(col_index, row1_, columns);
ColType res2 = LesserRTTI::at<ColType>(col_index, row2_, columns);

if( res1 > res2 )
return State::True;
else if( res1 == res2 )
return State::Undecided;
else
return State::False;
}
private:
IndexT row1_;
IndexT row2_;
};

struct OpEqual
{
__host__ __device__
Expand Down Expand Up @@ -336,6 +423,7 @@ struct LesserRTTI
const int* const rtti_;
size_t sz_;
const void* const * vals_; //for filtering
const gdf_valid_type* asc_desc_bitmask_; //a bitmask that allows us to know whether or not a column should be sorted ascending or descending
};

//###########################################################################
Expand Down Expand Up @@ -379,6 +467,8 @@ void multi_col_order_by(size_t nrows,
});
}



//###########################################################################
//# Multi-column Filter: #
//###########################################################################
Expand Down Expand Up @@ -709,4 +799,27 @@ size_t multi_col_group_by_avg_sort(size_t nrows,
return new_sz;
}

#include <iostream>
template<typename IndexT>
void multi_col_order_by_asc_desc(
void* const * d_col_data,
int* d_col_types,
size_t num_inputs,
gdf_valid_type * asc_desc_bitmask,
IndexT* d_indx,
size_t nrows,
cudaStream_t stream = NULL){

LesserRTTI<IndexT> f(d_col_data, d_col_types, num_inputs,asc_desc_bitmask);
std::cout<<"about to sequence"<<std::endl;
thrust::sequence(thrust::cuda::par.on(stream), d_indx, d_indx+nrows, 0);
std::cout<<"sequenced"<<std::endl;
thrust::sort(thrust::cuda::par.on(stream),
d_indx, d_indx+nrows,
[f] __host__ __device__ (IndexT i1, IndexT i2){
return f.asc_desc_comparison(i1, i2);
});

std::cout<<"sorted"<<std::endl;
}

55 changes: 55 additions & 0 deletions src/sqls_ops.cu
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,23 @@ namespace{ //annonymus
cudaMemcpy(d_types, h_types, ncols*sizeof(int), cudaMemcpyHostToDevice);//TODO: add streams
}

//does not assume the columns were contiguous in memory but arer rather pointers to columns
void soa_col_info_arr(gdf_column** cols, size_t ncols, void** d_cols, int* d_types)
{
std::vector<void*> v_cols(ncols,nullptr);
std::vector<int> v_types(ncols, 0);
for(size_t i=0;i<ncols;++i)
{
v_cols[i] = cols[i]->data;
v_types[i] = cols[i]->dtype;
}

void** h_cols = &v_cols[0];
int* h_types = &v_types[0];
cudaMemcpy(d_cols, h_cols, ncols*sizeof(void*), cudaMemcpyHostToDevice);//TODO: add streams
cudaMemcpy(d_types, h_types, ncols*sizeof(int), cudaMemcpyHostToDevice);//TODO: add streams
}

template<typename T>
using Vector = thrust::device_vector<T>;

Expand Down Expand Up @@ -1375,3 +1392,41 @@ gdf_error gdf_group_by_count(int ncols, // # columns
}


gdf_error gdf_order_by_asc_desc(
gdf_column * input_columns,
size_t num_inputs,
gdf_column * output_indices,
gdf_valid_type * asc_desc_bitmask){

std::cout<<"i mean this is the first fucking line...."<<std::endl;
//TODO: don't assume type of output is size_t
typedef size_t IndexT;
//TODO: make these allocations happen with the new memory manager when we can
//also we are kind of assuming they will just work, yeesh!
thrust::device_vector<size_t> test(2);

std::cout<<"can do basic shit...."<<std::endl;
thrust::device_vector<void*> d_cols(num_inputs);
thrust::device_vector<int> d_types(num_inputs, 0);

void** d_col_data = d_cols.data().get();
int* d_col_types = d_types.data().get();

std::cout<<"about to run soa"<<std::endl;

soa_col_info(input_columns, num_inputs, d_col_data, d_col_types);

std::cout<<"about to run mul col order by"<<std::endl;
multi_col_order_by_asc_desc(
d_col_data,
d_col_types,
num_inputs,
asc_desc_bitmask,
(size_t *) output_indices->data,
input_columns[0].size);

return GDF_SUCCESS;

}


0 comments on commit 3a74ff8

Please sign in to comment.