From e5c380bd27e2166a63fbf9633f64a5ff680aadda Mon Sep 17 00:00:00 2001
From: StrikeW <wangsiyuanse@gmail.com>
Date: Mon, 28 Oct 2019 14:00:24 +0800
Subject: [PATCH] fix #11: wukong+g compile errors

---
 core/config.hpp              |  5 ++++
 core/global.hpp              | 56 ++++++++++++++++++------------------
 core/gpu/gpu_agent.hpp       |  2 +-
 core/gpu/gpu_cache.hpp       |  3 +-
 core/gpu/gpu_engine.hpp      |  1 -
 core/gpu/gpu_engine_cuda.hpp |  9 +++---
 core/gpu/gpu_hash.hpp        |  1 +
 core/gpu/gpu_mem.hpp         |  2 +-
 core/store/gstore.hpp        | 20 ++++++++++---
 core/store/meta.hpp          | 43 +++++++++++++++++++++------
 utils/gpu.hpp                |  2 --
 11 files changed, 94 insertions(+), 50 deletions(-)

diff --git a/core/config.hpp b/core/config.hpp
index 669dd09..3c339db 100644
--- a/core/config.hpp
+++ b/core/config.hpp
@@ -223,6 +223,11 @@ void load_config(string fname, int nsrvs)
 #ifdef USE_GPU
     // each GPU card needs one (dedicated) agent thread
     Global::num_threads += Global::num_gpus;
+    if (Global::num_gpus != 1) {
+        logstream(LOG_ERROR) << "Wrong config: please config num_gpus with 1 to enable GPU extension."
+                             << LOG_endl;
+        exit(-1);
+    }
 #endif
 
     // limited the number of engines
diff --git a/core/global.hpp b/core/global.hpp
index e58e80b..1af8e84 100644
--- a/core/global.hpp
+++ b/core/global.hpp
@@ -31,46 +31,46 @@ class Global {
     // another choice
     // e.g., static int &num_threads() { static int _num_threads = 2; return _num_threads; }
 
-    static int num_servers;
-    static int num_threads;
+    static int num_servers __attribute__((weak));
+    static int num_threads __attribute__((weak));
 
-    static int num_proxies;
-    static int num_engines;
+    static int num_proxies __attribute__((weak));
+    static int num_engines __attribute__((weak));
 
-    static string input_folder;
+    static string input_folder __attribute__((weak));
 
-    static int data_port_base;
-    static int ctrl_port_base;
+    static int data_port_base __attribute__((weak));
+    static int ctrl_port_base __attribute__((weak));
 
-    static int rdma_buf_size_mb;
-    static int rdma_rbf_size_mb;
+    static int rdma_buf_size_mb __attribute__((weak));
+    static int rdma_rbf_size_mb __attribute__((weak));
 
-    static bool use_rdma;
-    static int rdma_threshold;
+    static bool use_rdma __attribute__((weak));
+    static int rdma_threshold __attribute__((weak));
 
-    static int mt_threshold;
+    static int mt_threshold __attribute__((weak));
 
-    static bool enable_caching;
-    static bool enable_workstealing;
-    static int stealing_pattern;
+    static bool enable_caching __attribute__((weak));
+    static bool enable_workstealing __attribute__((weak));
+    static int stealing_pattern __attribute__((weak));
 
-    static bool silent;
+    static bool silent __attribute__((weak));
 
-    static bool enable_planner;
-    static bool generate_statistics;
+    static bool enable_planner __attribute__((weak));
+    static bool generate_statistics __attribute__((weak));
 
-    static bool enable_vattr;
+    static bool enable_vattr __attribute__((weak));
 
-    static int memstore_size_gb;
-    static int est_load_factor;
+    static int memstore_size_gb __attribute__((weak));
+    static int est_load_factor __attribute__((weak));
 
-    static int num_gpus;
-    static int gpu_kvcache_size_gb;
-    static int gpu_rbuf_size_mb;
-    static int gpu_rdma_buf_size_mb;
-    static int gpu_key_blk_size_mb;
-    static int gpu_value_blk_size_mb;
-    static bool gpu_enable_pipeline;
+    static int num_gpus __attribute__((weak));
+    static int gpu_kvcache_size_gb __attribute__((weak));
+    static int gpu_rbuf_size_mb __attribute__((weak));
+    static int gpu_rdma_buf_size_mb __attribute__((weak));
+    static int gpu_key_blk_size_mb __attribute__((weak));
+    static int gpu_value_blk_size_mb __attribute__((weak));
+    static bool gpu_enable_pipeline __attribute__((weak));
 };
 
 
diff --git a/core/gpu/gpu_agent.hpp b/core/gpu/gpu_agent.hpp
index 549b72e..69a0be2 100644
--- a/core/gpu/gpu_agent.hpp
+++ b/core/gpu/gpu_agent.hpp
@@ -152,7 +152,7 @@ class GPUAgent {
     // fork-join or in-place execution
     bool need_fork_join(SPARQLQuery &req) {
         // always need NOT fork-join when executing on single machine
-        if (Global::num_serverss == 1) return false;
+        if (Global::num_servers == 1) return false;
 
         // always need fork-join mode w/o RDMA
         if (!Global::use_rdma) return true;
diff --git a/core/gpu/gpu_cache.hpp b/core/gpu/gpu_cache.hpp
index 48dc98f..6b18f5e 100644
--- a/core/gpu/gpu_cache.hpp
+++ b/core/gpu/gpu_cache.hpp
@@ -36,6 +36,7 @@
 // utils
 #include "unit.hpp"
 #include "gpu.hpp"
+#include "global.hpp"
 
 using namespace std;
 
@@ -287,7 +288,7 @@ class GPUCache {
             // step 4.2 traverse the ext_bucket_list and load
             uint64_t passed_buckets = 0;
 
-            for (int i = 0; i < rdf_metas[seg].ext_bucket_list.size(); i++) {
+            for (int i = 0; i < rdf_metas[seg].get_ext_bucket_list_size(); i++) {
                 ext_bucket_extent_t ext = rdf_metas[seg].ext_bucket_list[i];
                 /* load from this ext
                  * inside_off: the offset inside the ext
diff --git a/core/gpu/gpu_engine.hpp b/core/gpu/gpu_engine.hpp
index 0d66559..2f56db5 100644
--- a/core/gpu/gpu_engine.hpp
+++ b/core/gpu/gpu_engine.hpp
@@ -28,7 +28,6 @@
 #include <boost/unordered_map.hpp>
 #include <vector>
 
-#include "config.hpp"
 #include "type.hpp"
 #include "dgraph.hpp"
 #include "query.hpp"
diff --git a/core/gpu/gpu_engine_cuda.hpp b/core/gpu/gpu_engine_cuda.hpp
index f38cb8b..aca5d34 100644
--- a/core/gpu/gpu_engine_cuda.hpp
+++ b/core/gpu/gpu_engine_cuda.hpp
@@ -27,6 +27,7 @@
 #include <vector>
 #include <utility>
 
+#include "global.hpp"
 #include "assertion.hpp"
 #include "query.hpp"
 
@@ -126,7 +127,7 @@ class GPUEngineCuda final {
         param.query.var2col_start = req.result.var2col(start);
 
         logstream(LOG_DEBUG) << "known_to_unknown: #ext_buckets: "
-                             << seg_meta.ext_bucket_list.size() << LOG_endl;
+                             << seg_meta.get_ext_bucket_list_size() << LOG_endl;
 
         ASSERT(gmem->res_inbuf() != gmem->res_outbuf());
         ASSERT(nullptr != gmem->res_inbuf());
@@ -139,7 +140,7 @@ class GPUEngineCuda final {
 
 
         // prefetch segment of next pattern
-        if (global_gpu_enable_pipeline && has_next_pattern(req)) {
+        if (Global::gpu_enable_pipeline && has_next_pattern(req)) {
             auto next_seg = pattern_to_segid(req, req.pattern_step + 1);
             auto stream2 = stream_pool->get_stream(next_seg.pid);
 
@@ -227,7 +228,7 @@ class GPUEngineCuda final {
 
 
         // preload next predicate
-        if (global_gpu_enable_pipeline && has_next_pattern(req)) {
+        if (Global::gpu_enable_pipeline && has_next_pattern(req)) {
             auto next_seg = pattern_to_segid(req, req.pattern_step + 1);
             auto stream2 = stream_pool->get_stream(next_seg.pid);
 
@@ -308,7 +309,7 @@ class GPUEngineCuda final {
 
 
         // preload next predicate
-        if (global_gpu_enable_pipeline && has_next_pattern(req)) {
+        if (Global::gpu_enable_pipeline && has_next_pattern(req)) {
             auto next_seg = pattern_to_segid(req, req.pattern_step + 1);
             auto stream2 = stream_pool->get_stream(next_seg.pid);
 
diff --git a/core/gpu/gpu_hash.hpp b/core/gpu/gpu_hash.hpp
index 560396e..31b785d 100644
--- a/core/gpu/gpu_hash.hpp
+++ b/core/gpu/gpu_hash.hpp
@@ -34,6 +34,7 @@
 #include "store/vertex.hpp"
 
 // utils
+#include "global.hpp"
 #include "gpu.hpp"
 #include "unit.hpp"
 
diff --git a/core/gpu/gpu_mem.hpp b/core/gpu/gpu_mem.hpp
index d9d4be1..2efadb6 100644
--- a/core/gpu/gpu_mem.hpp
+++ b/core/gpu/gpu_mem.hpp
@@ -24,13 +24,13 @@
 
 #ifdef USE_GPU
 
-#include "global.hpp"
 #include "rdma.hpp"
 #include "type.hpp"
 
 // utils
 #include "unit.hpp"
 #include "gpu.hpp"
+#include "global.hpp"
 
 class GPUMem {
 private:
diff --git a/core/store/gstore.hpp b/core/store/gstore.hpp
index 12df064..39da402 100644
--- a/core/store/gstore.hpp
+++ b/core/store/gstore.hpp
@@ -460,8 +460,14 @@ class GStore {
         // allocate buckets in indirect-header region to segments
         // #buckets : #extended buckets = 1 : 0.15
         if (seg.num_buckets > 0) {
-            uint64_t start_off = alloc_ext_buckets(EXT_BUCKET_EXTENT_LEN);
-            seg.add_ext_buckets(ext_bucket_extent_t(EXT_BUCKET_EXTENT_LEN, start_off));
+            uint64_t nbuckets = 0;
+#ifdef USE_GPU
+            nbuckets = EXT_BUCKET_EXTENT_LEN(seg.num_buckets);
+#else
+            nbuckets = EXT_BUCKET_EXTENT_LEN;
+#endif
+            uint64_t start_off = alloc_ext_buckets(nbuckets);
+            seg.add_ext_buckets(ext_bucket_extent_t(nbuckets, start_off));
         }
     }
 
@@ -826,8 +832,14 @@ class GStore {
             rdf_seg_meta_t &seg = rdf_seg_meta_map[segid_t(key)];
             uint64_t ext_bucket_id = seg.get_ext_bucket();
             if (ext_bucket_id == 0) {
-                uint64_t start_off = alloc_ext_buckets(EXT_BUCKET_EXTENT_LEN);
-                seg.add_ext_buckets(ext_bucket_extent_t(EXT_BUCKET_EXTENT_LEN, start_off));
+                uint64_t nbuckets = 0;
+#ifdef USE_GPU
+                nbuckets = EXT_BUCKET_EXTENT_LEN(seg.num_buckets);
+#else
+                nbuckets = EXT_BUCKET_EXTENT_LEN;
+#endif
+                uint64_t start_off = alloc_ext_buckets(nbuckets);
+                seg.add_ext_buckets(ext_bucket_extent_t(nbuckets, start_off));
                 ext_bucket_id = seg.get_ext_bucket();
             }
             pthread_spin_unlock(&seg_ext_locks[seg_ext_lock_id]);
diff --git a/core/store/meta.hpp b/core/store/meta.hpp
index 3219fb5..1e27fd1 100644
--- a/core/store/meta.hpp
+++ b/core/store/meta.hpp
@@ -35,8 +35,12 @@
 using namespace std;
 using namespace boost::archive;
 
-
+#ifdef USE_GPU
+#define EXT_BUCKET_LIST_CAPACITY 1
+#define EXT_BUCKET_EXTENT_LEN(num_buckets) (num_buckets * 15 / 100 + 1)
+#else
 #define EXT_BUCKET_EXTENT_LEN 256
+#endif
 #define PREDICATE_NSEGS 2
 #ifdef VERSATILE
 #define INDEX_NSEGS 4   // index(2) + vid's all preds(2)
@@ -75,15 +79,39 @@ struct rdf_seg_meta_t {
     uint64_t num_keys = 0;      // #keys of the segment
     uint64_t num_buckets = 0;   // allocated main headers (hash space)
     uint64_t bucket_start = 0;  // start offset of main-header region of gstore
-    vector<ext_bucket_extent_t> ext_bucket_list;
     uint64_t num_edges = 0;     // #edges of the segment
     uint64_t edge_start = 0;    // start offset in the entry region of gstore
 
     int num_key_blks = 0;       // #key-blocks needed in gcache
     int num_value_blks = 0;     // #value-blocks needed in gcache
 
+#ifdef USE_GPU
+    ext_bucket_extent_t ext_bucket_list[EXT_BUCKET_LIST_CAPACITY];
+    size_t ext_bucket_list_sz = 0;
+
+    rdf_seg_meta_t() {
+        memset(&ext_bucket_list, 0, sizeof(ext_bucket_list));
+    }
+
+    size_t get_ext_bucket_list_size() const { return ext_bucket_list_sz; }
+
+    void add_ext_buckets(const ext_bucket_extent_t &ext) {
+        assert(ext_bucket_list_sz < EXT_BUCKET_LIST_CAPACITY);
+        ext_bucket_list[ext_bucket_list_sz++] = ext;
+    }
+#else
+    vector<ext_bucket_extent_t> ext_bucket_list;
+
+    size_t get_ext_bucket_list_size() const { return ext_bucket_list.size(); }
+
+    void add_ext_buckets(const ext_bucket_extent_t &ext) {
+        ext_bucket_list.push_back(ext);
+    }
+
+#endif
+
     uint64_t get_ext_bucket() {
-        for (int i = 0; i < ext_bucket_list.size(); ++i) {
+        for (int i = 0; i < get_ext_bucket_list_size(); ++i) {
             ext_bucket_extent_t &ext = ext_bucket_list[i];
             if (ext.off < ext.num_ext_buckets) {
                 return ext.start + ext.off++;
@@ -92,13 +120,9 @@ struct rdf_seg_meta_t {
         return 0;
     }
 
-    void add_ext_buckets(const ext_bucket_extent_t &ext) {
-        ext_bucket_list.push_back(ext);
-    }
-
     inline uint64_t get_total_num_buckets() const {
         uint64_t total = num_buckets;
-        for (int i = 0; i < ext_bucket_list.size(); ++i) {
+        for (int i = 0; i < get_ext_bucket_list_size(); ++i) {
             total += ext_bucket_list[i].num_ext_buckets;
         }
         return total;
@@ -109,6 +133,9 @@ struct rdf_seg_meta_t {
         ar & num_buckets;
         ar & bucket_start;
         ar & ext_bucket_list;
+#ifdef USE_GPU
+        ar & ext_bucket_list_sz;
+#endif
         ar & num_edges;
         ar & edge_start;
     }
diff --git a/utils/gpu.hpp b/utils/gpu.hpp
index 596f9f0..00920fa 100644
--- a/utils/gpu.hpp
+++ b/utils/gpu.hpp
@@ -22,8 +22,6 @@
 
 #pragma once
 
-#include "global.hpp"
-
 #ifdef USE_GPU
 #include <cuda_runtime.h>