Skip to content

Commit

Permalink
Trying to reproduce #4
Browse files Browse the repository at this point in the history
  • Loading branch information
maawad committed Feb 7, 2023
1 parent f30e750 commit 8a8a042
Show file tree
Hide file tree
Showing 7 changed files with 121 additions and 13 deletions.
26 changes: 18 additions & 8 deletions docker/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -6,21 +6,31 @@ RUN apt-get clean && apt-get update -y -qq
RUN apt-get install -y wget git build-essential

# Install conda
ENV PATH="/root/anaconda3/bin:${PATH}"
RUN wget https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh &&\
chmod +x Miniconda3-latest-Linux-x86_64.sh &&\
./Miniconda3-latest-Linux-x86_64.sh -b -p /root/anaconda3
#ENV PATH="/root/anaconda3/bin:${PATH}"
#RUN wget https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh &&\
# chmod +x Miniconda3-latest-Linux-x86_64.sh &&\
# ./Miniconda3-latest-Linux-x86_64.sh -b -p /root/anaconda3

# Create conda environment with dependencies
COPY environment.yml .
RUN conda env create -f environment.yml
SHELL ["conda", "run", "-n", "mvgpubtree", "/bin/bash", "-c"]
#COPY environment.yml .
#RUN conda env create -f environment.yml
#SHELL ["conda", "run", "-n", "mvgpubtree", "/bin/bash", "-c"]

# Clone MVGpuBTree code from github
RUN git clone https://github.com/owensgroup/MVGpuBTree.git

#RUN apt-get update && DEBIAN_FRONTEND=noninteractive apt-get install -y hip-runtime-nvidia hip-dev

# Install CMake
ARG CMAKE_VERSION=3.18.0
RUN wget https://github.com/Kitware/CMake/releases/download/v${CMAKE_VERSION}/cmake-${CMAKE_VERSION}-linux-x86_64.sh &&\
chmod +x cmake-${CMAKE_VERSION}-linux-x86_64.sh &&\
mkdir /opt/cmake && \
./cmake-${CMAKE_VERSION}-linux-x86_64.sh --skip-license --prefix=/opt/cmake &&\
ln -s /opt/cmake/bin/* /usr/local/bin/

# Activate conda
ENTRYPOINT ["conda", "run", "--no-capture-output", "-n", "mvgpubtree"]
#ENTRYPOINT ["conda", "run", "--no-capture-output", "-n", "mvgpubtree"]

# Building the image:
# docker build -t mvgpubtree .
Expand Down
3 changes: 3 additions & 0 deletions docker/build.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
#!/bin/bash

docker build -t mvgpubtree -f docker/Dockerfile .
4 changes: 4 additions & 0 deletions docker/run.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
#!/bin/bash
git_directoy=$(pwd -P)

docker run -it --rm --name trees --gpus all -v $git_directoy:$git_directoy -w $git_directoy mvgpubtree /bin/bash
2 changes: 1 addition & 1 deletion include/device_bump_allocator.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@
#include <cstdint>

// 67108864 is 8 Gibs when sizeof(T) = 128
template <class T, std::size_t MaxTCount = 67108864>
template <class T, std::size_t MaxTCount = 67108864 / 2>
struct device_bump_allocator {
using size_type = uint32_t;
using difference_type = uint32_t;
Expand Down
7 changes: 3 additions & 4 deletions include/gpu_blink_tree.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -35,8 +35,8 @@
#include <device_bump_allocator.hpp>
#include <slab_alloc.hpp>

//#define DEBUG_LOCKS
// #define DEBUG_STRUCTURE
// #define DEBUG_LOCKS
// #define DEBUG_STRUCTURE

#ifdef DEBUG_STRUCTURE
#define DEBUG_STRUCTURE_PRINT(fmt, ...) \
Expand Down Expand Up @@ -1024,6 +1024,7 @@ struct gpu_blink_tree {
double tree_size_gbs = double(num_nodes) * sizeof(node_type<Key, Value, B>) / (1ull << 30);
return tree_size_gbs;
}
allocator_type allocator_;

private:
template <typename key_type,
Expand Down Expand Up @@ -1124,7 +1125,5 @@ struct gpu_blink_tree {
size_type* h_node_count_;

size_type* d_root_index_;

allocator_type allocator_;
};
} // namespace GpuBTree
7 changes: 7 additions & 0 deletions test/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -1,5 +1,12 @@
project (test)

# repro issue #4
add_executable(repro_4 repro_4.cu)
set_target_properties(repro_4 PROPERTIES
FOLDER "tests")
target_link_libraries(repro_4 PRIVATE gpu_btrees)



# concurrent insert range query
add_executable(concurrent_insert_rq_vtree concurrent_insert_rq_vtree.cu)
Expand Down
85 changes: 85 additions & 0 deletions test/repro_4.cu
Original file line number Diff line number Diff line change
@@ -0,0 +1,85 @@
#include <gpu_btree.h>
#include <cstdint>
#include <random>
#include <unordered_set>

#include <cooperative_groups.h>
namespace cg = cooperative_groups;

template <typename key_type, typename size_type, typename btree>
__global__ void modified_insert_kernel(const key_type* keys,
const size_type keys_count,
btree tree) {
auto thread_id = threadIdx.x + blockIdx.x * blockDim.x;
auto block = cg::this_thread_block();
auto tile = cg::tiled_partition<btree::branching_factor>(block);

if ((thread_id - tile.thread_rank()) >= keys_count) { return; }

auto key = btree::invalid_key;
auto value = btree::invalid_value;
bool to_insert = false;
if (thread_id < keys_count) {
key = keys[thread_id];
value = thread_id;
to_insert = true;
}
using allocator_type = typename btree::device_allocator_context_type;
allocator_type allocator{tree.allocator_, tile};

size_type num_inserted = 1;
auto work_queue = tile.ballot(to_insert);
while (work_queue) {
auto cur_rank = __ffs(work_queue) - 1;
auto cur_key = tile.shfl(key, cur_rank);
auto cur_value = tile.shfl(value, cur_rank);

tree.cooperative_insert(cur_key, cur_value, tile, allocator);

if (tile.thread_rank() == cur_rank) { to_insert = false; }
num_inserted++;
work_queue = tile.ballot(to_insert);
}
}

void investigate_tree_deadlock() {
using key_type = uint32_t;
using value_type = uint32_t;

size_t build_size = size_t{1} << 25;
key_type min_usable_key = 1;
key_type max_usable_key = std::numeric_limits<key_type>::max() - 2;

std::mt19937_64 gen(42);
std::uniform_int_distribution<key_type> key_dist(min_usable_key, max_usable_key);
std::vector<key_type> build_keys(build_size);
std::unordered_set<key_type> build_keys_set;
std::cout << "Generating keys.." << std::endl;

while (build_keys_set.size() < build_size) {
key_type key = key_dist(gen);
build_keys_set.insert(key);
}
std::copy(build_keys_set.begin(), build_keys_set.end(), build_keys.begin());
std::sort(build_keys.begin(), build_keys.end());

key_type* keys_on_gpu;
cudaMalloc(&keys_on_gpu, build_size * sizeof(key_type));
cudaMemcpy(keys_on_gpu, build_keys.data(), build_size * sizeof(key_type), cudaMemcpyHostToDevice);
for (size_t i = 0; i < 10000; ++i) {
std::cout << "round " << i << " starting" << std::endl;

GpuBTree::gpu_blink_tree<key_type, value_type, 16> tree;
modified_insert_kernel<<<(build_size + 511) / 512, 512>>>(keys_on_gpu, build_size, tree);

std::cout << "tree uses " << tree.compute_memory_usage() << " GB" << std::endl;
std::cout << "round " << i << " done" << std::endl;
}

cudaFree(keys_on_gpu);
}

int main() {
investigate_tree_deadlock();
return 0;
}

0 comments on commit 8a8a042

Please sign in to comment.