Skip to content

Commit

Permalink
refactored benchmark; retrieve max key range
Browse files Browse the repository at this point in the history
  • Loading branch information
Funatiq committed Oct 5, 2020
1 parent 30a76d7 commit db3a0b4
Show file tree
Hide file tree
Showing 7 changed files with 119 additions and 42 deletions.
35 changes: 14 additions & 21 deletions benchmark/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -10,36 +10,29 @@ NVCCFLAGS := -std=$(STD) $(NVCCGENCODE) --expt-extended-lambda --expt-relaxed-co
INCS := $(foreach dir, $(INCDIRS), $(wildcard $(dir)/*.cuh $(dir)/*.h $(dir)/*.hpp))
INCPARAMS := $(addprefix -I, $(INCDIRS))

all: single_value multi_value multi_bucket counting bloom_filter bucket_list

single_value: bin/single_value_benchmark.out

bin/single_value_benchmark.out: ${INCS} | bin
all: bin/single_value_benchmark.out \
bin/multi_value_benchmark.out \
bin/multi_bucket_benchmark.out \
bin/counting_benchmark.out \
bin/bloom_filter_benchmark.out \
bin/bucket_list_benchmark.out

bin/single_value_benchmark.out: src/single_value_benchmark.cu src/common.cuh ${INCS} | bin
$(NVCC) $(NVCCFLAGS) $(INCPARAMS) src/single_value_benchmark.cu -o bin/single_value_benchmark.out

multi_value: bin/multi_value_benchmark.out

bin/multi_value_benchmark.out: ${INCS} | bin
bin/multi_value_benchmark.out: src/multi_value_benchmark.cu src/common.cuh ${INCS} | bin
$(NVCC) $(NVCCFLAGS) $(INCPARAMS) src/multi_value_benchmark.cu -o bin/multi_value_benchmark.out

multi_bucket: bin/multi_bucket_benchmark.out

bin/multi_bucket_benchmark.out: ${INCS} | bin
bin/multi_bucket_benchmark.out: src/multi_bucket_benchmark.cu src/common.cuh ${INCS} | bin
$(NVCC) $(NVCCFLAGS) $(INCPARAMS) src/multi_bucket_benchmark.cu -o bin/multi_bucket_benchmark.out

counting: bin/counting_benchmark.out

bin/counting_benchmark.out: ${INCS} | bin
bin/counting_benchmark.out: src/counting_benchmark.cu src/common.cuh ${INCS} | bin
$(NVCC) $(NVCCFLAGS) $(INCPARAMS) src/counting_benchmark.cu -o bin/counting_benchmark.out

bloom_filter: bin/bloom_filter_benchmark.out

bin/bloom_filter_benchmark.out: ${INCS} | bin
bin/bloom_filter_benchmark.out: src/bloom_filter_benchmark.cu src/common.cuh ${INCS} | bin
$(NVCC) $(NVCCFLAGS) $(INCPARAMS) src/bloom_filter_benchmark.cu -o bin/bloom_filter_benchmark.out

bucket_list: bin/bucket_list_benchmark.out

bin/bucket_list_benchmark.out: ${INCS} | bin
bin/bucket_list_benchmark.out: src/bucket_list_benchmark.cu src/common.cuh ${INCS} | bin
$(NVCC) $(NVCCFLAGS) $(INCPARAMS) src/bucket_list_benchmark.cu -o bin/bucket_list_benchmark.out

debug: OPT := 0
Expand All @@ -57,4 +50,4 @@ clean:
bin:
mkdir -p $@

.PHONY: clean all bin multi_value multi_bucket counting bloom_filter bucket_list
.PHONY: clean all debug profile
18 changes: 13 additions & 5 deletions benchmark/src/bucket_list_benchmark.cu
Original file line number Diff line number Diff line change
Expand Up @@ -51,8 +51,8 @@ void bucket_list_benchmark(
const uint64_t key_store_capacity = max_unique_size / key_load_factor;
const uint64_t value_store_capacity = max_keys / value_load_factor;

key_t* unique_keys_d = nullptr;
cudaMalloc(&unique_keys_d, sizeof(key_t)*max_unique_size); CUERR
key_t* query_keys_d = nullptr;
cudaMalloc(&query_keys_d, sizeof(key_t)*max_keys); CUERR
value_t* values_d = nullptr;
cudaMalloc(&values_d, sizeof(value_t)*max_keys); CUERR
index_t * offsets_d = nullptr;
Expand Down Expand Up @@ -98,9 +98,14 @@ void bucket_list_benchmark(
iters, thermal_backoff);

output.query_ms = benchmark_query_multi(
hash_table, unique_keys_d, offsets_d, values_d,
hash_table, query_keys_d, size,
offsets_d, values_d,
iters, thermal_backoff);

// output.query_ms = benchmark_query_unique(
// hash_table, query_keys_d, offsets_d, values_d,
// iters, thermal_backoff);

output.key_load_factor = hash_table.key_load_factor();
output.value_load_factor = hash_table.value_load_factor();
output.density = hash_table.storage_density();
Expand All @@ -120,7 +125,7 @@ void bucket_list_benchmark(
}
}

cudaFree(unique_keys_d); CUERR
cudaFree(query_keys_d); CUERR
cudaFree(values_d); CUERR
cudaFree(offsets_d); CUERR
}
Expand All @@ -134,6 +139,8 @@ int main(int argc, char* argv[])

const uint64_t max_keys = 1UL << 27;

const bool print_headers = true;

uint64_t dev_id = 0;
if(argc > 2) dev_id = std::atoi(argv[2]);
cudaSetDevice(dev_id); CUERR
Expand All @@ -158,7 +165,8 @@ int main(int argc, char* argv[])
0.50,
{max_keys},
{{1.1, 1, 0}},
0x5ad0ded);
0x5ad0ded,
print_headers);

cudaFree(keys_d); CUERR
}
52 changes: 52 additions & 0 deletions benchmark/src/common.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -175,6 +175,58 @@ float benchmark_query(

template<class HashTable>
float benchmark_query_multi(
HashTable& hash_table,
typename HashTable::key_type * keys_d,
const uint64_t size,
typename HashTable::index_type * offsets_d,
typename HashTable::value_type * values_d,
const uint8_t iters,
const std::chrono::milliseconds thermal_backoff)
{
using index_t = typename HashTable::index_type;

helpers::lambda_kernel
<<<SDIV(size, 1024), 1024>>>
([=] DEVICEQUALIFIER
{
const uint64_t tid = blockDim.x * blockIdx.x + threadIdx.x;

if(tid < size)
{
keys_d[tid] = tid + 1;
}
});
cudaDeviceSynchronize(); CUERR

index_t value_size_out = 0;

std::vector<float> query_times(iters);
for(uint64_t i = 0; i < iters; i++)
{
cudaEvent_t query_start, query_stop;
float t;
cudaEventCreate(&query_start);
cudaEventCreate(&query_stop);
cudaEventRecord(query_start, 0);
hash_table.retrieve(
keys_d,
size,
offsets_d,
offsets_d+1,
values_d,
value_size_out);
cudaEventRecord(query_stop, 0);
cudaEventSynchronize(query_stop);
cudaEventElapsedTime(&t, query_start, query_stop);
cudaDeviceSynchronize(); CUERR
query_times[i] = t;
std::this_thread::sleep_for(thermal_backoff);
}
return *std::min_element(query_times.begin(), query_times.end());
}

template<class HashTable>
float benchmark_query_unique(
HashTable& hash_table,
typename HashTable::key_type * unique_keys_d,
typename HashTable::index_type * offsets_d,
Expand Down
4 changes: 3 additions & 1 deletion benchmark/src/counting_benchmark.cu
Original file line number Diff line number Diff line change
Expand Up @@ -84,6 +84,8 @@ int main(int argc, char* argv[])

const uint64_t max_keys = 1UL << 28;

const bool print_headers = true;

uint64_t dev_id = 0;
if(argc > 2) dev_id = std::atoi(argv[2]);
cudaSetDevice(dev_id); CUERR
Expand All @@ -97,7 +99,7 @@ int main(int argc, char* argv[])
using hash_table_t = warpcore::CountingHashTable<key_t, count_t>;

counting_benchmark<hash_table_t>(
keys_d, max_keys, {max_keys}, {0.9}, true);
keys_d, max_keys, {max_keys}, {0.9}, print_headers);

cudaFree(keys_d); CUERR
}
27 changes: 19 additions & 8 deletions benchmark/src/multi_bucket_benchmark.cu
Original file line number Diff line number Diff line change
Expand Up @@ -22,8 +22,8 @@ void multi_value_benchmark(

const uint64_t max_unique_size = num_unique(keys_d, max_keys);

key_t* unique_keys_d = nullptr;
cudaMalloc(&unique_keys_d, sizeof(key_t)*max_unique_size); CUERR
key_t* query_keys_d = nullptr;
cudaMalloc(&query_keys_d, sizeof(key_t)*max_keys); CUERR
value_t* values_d = nullptr;
cudaMalloc(&values_d, sizeof(value_t)*max_keys); CUERR
index_t * offsets_d = nullptr;
Expand Down Expand Up @@ -75,9 +75,14 @@ void multi_value_benchmark(
// std::cerr << "keys in set: " << key_set.size() << '\n';

output.query_ms = benchmark_query_multi(
hash_table, unique_keys_d, offsets_d, values_d,
hash_table, query_keys_d, size,
offsets_d, values_d,
iters, thermal_backoff);

// output.query_ms = benchmark_query_unique(
// hash_table, query_keys_d, offsets_d, values_d,
// iters, thermal_backoff);

output.key_load_factor = hash_table.key_load_factor();
output.value_load_factor = hash_table.value_load_factor();
output.density = hash_table.storage_density();
Expand All @@ -91,7 +96,7 @@ void multi_value_benchmark(
}
}

cudaFree(unique_keys_d); CUERR
cudaFree(query_keys_d); CUERR
cudaFree(values_d); CUERR
cudaFree(offsets_d); CUERR
}
Expand All @@ -105,6 +110,8 @@ int main(int argc, char* argv[])

const uint64_t max_keys = 1UL << 27;

const bool print_headers = true;

uint64_t dev_id = 0;
if(argc > 2) dev_id = std::atoi(argv[2]);
cudaSetDevice(dev_id); CUERR
Expand Down Expand Up @@ -154,22 +161,26 @@ int main(int argc, char* argv[])
multi_value_benchmark<mb1_hash_table_t>(
keys_d, max_keys,
{max_keys},
{0.8});
{0.8},
print_headers);

multi_value_benchmark<mb2_hash_table_t>(
keys_d, max_keys,
{max_keys},
{0.8});
{0.8},
print_headers);

multi_value_benchmark<mb4_hash_table_t>(
keys_d, max_keys,
{max_keys},
{0.8});
{0.8},
print_headers);

multi_value_benchmark<mb8_hash_table_t>(
keys_d, max_keys,
{max_keys},
{0.8});
{0.8},
print_headers);

cudaFree(keys_d); CUERR
}
21 changes: 15 additions & 6 deletions benchmark/src/multi_value_benchmark.cu
Original file line number Diff line number Diff line change
Expand Up @@ -22,8 +22,8 @@ void multi_value_benchmark(

const uint64_t max_unique_size = num_unique(keys_d, max_keys);

key_t* unique_keys_d = nullptr;
cudaMalloc(&unique_keys_d, sizeof(key_t)*max_unique_size); CUERR
key_t* query_keys_d = nullptr;
cudaMalloc(&query_keys_d, sizeof(key_t)*max_keys); CUERR
value_t* values_d = nullptr;
cudaMalloc(&values_d, sizeof(value_t)*max_keys); CUERR
index_t * offsets_d = nullptr;
Expand Down Expand Up @@ -65,9 +65,14 @@ void multi_value_benchmark(
iters, thermal_backoff);

output.query_ms = benchmark_query_multi(
hash_table, unique_keys_d, offsets_d, values_d,
hash_table, query_keys_d, size,
offsets_d, values_d,
iters, thermal_backoff);

// output.query_ms = benchmark_query_unique(
// hash_table, query_keys_d, offsets_d, values_d,
// iters, thermal_backoff);

output.key_load_factor = hash_table.load_factor();
output.density = output.key_load_factor;
output.status = hash_table.pop_status();
Expand All @@ -79,7 +84,7 @@ void multi_value_benchmark(
}
}

cudaFree(unique_keys_d); CUERR
cudaFree(query_keys_d); CUERR
cudaFree(values_d); CUERR
cudaFree(offsets_d); CUERR
}
Expand All @@ -93,6 +98,8 @@ int main(int argc, char* argv[])

const uint64_t max_keys = 1UL << 27;

const bool print_headers = true;

uint64_t dev_id = 0;
if(argc > 2) dev_id = std::atoi(argv[2]);
cudaSetDevice(dev_id); CUERR
Expand Down Expand Up @@ -123,12 +130,14 @@ int main(int argc, char* argv[])
multi_value_benchmark<mv_hash_table_t, 1>(
keys_d, max_keys,
{max_keys},
{0.8});
{0.8},
print_headers);

// multi_value_benchmark<mb_hash_table_t, mb_hash_table_t::bucket_size()>(
// keys_d, max_keys,
// {max_keys},
// {0.8});
// {0.8},
// print_headers);

cudaFree(keys_d); CUERR
}
4 changes: 3 additions & 1 deletion benchmark/src/single_value_benchmark.cu
Original file line number Diff line number Diff line change
Expand Up @@ -82,6 +82,8 @@ int main(int argc, char* argv[])

const uint64_t max_keys = 1UL << 27;

const bool print_headers = true;

uint64_t dev_id = 0;
if(argc > 2) dev_id = std::atoi(argv[2]);
cudaSetDevice(dev_id); CUERR
Expand All @@ -101,7 +103,7 @@ int main(int argc, char* argv[])
storage::key_value::AoSStore<key_t, value_t>>;

single_value_benchmark<hash_table_t>(
keys_d, max_keys, {max_keys}, {0.8}, true);
keys_d, max_keys, {max_keys}, {0.8}, print_headers);

cudaFree(keys_d); CUERR
}

0 comments on commit db3a0b4

Please sign in to comment.