From daec371ec702152f727f88144c8201b8ca8469b8 Mon Sep 17 00:00:00 2001
From: Joongi Kim <joongi@an.kaist.ac.kr>
Date: Sun, 27 Dec 2015 16:51:59 +0900
Subject: [PATCH] refs #6: Now GPU version (IPv4) works.

---
 include/nba/element/element.hh       | 1 +
 include/nba/engines/cuda/mempool.hh  | 6 +++---
 include/nba/engines/dummy/mempool.hh | 2 +-
 include/nba/engines/phi/mempool.hh   | 4 ++--
 src/engines/cuda/computecontext.cc   | 2 +-
 src/engines/dummy/computecontext.cc  | 2 +-
 src/lib/io.cc                        | 2 +-
 src/lib/offloadtask.cc               | 4 ----
 8 files changed, 10 insertions(+), 13 deletions(-)

diff --git a/include/nba/element/element.hh b/include/nba/element/element.hh
index bdd9644..cc6b1d3 100644
--- a/include/nba/element/element.hh
+++ b/include/nba/element/element.hh
@@ -225,6 +225,7 @@ public:
             offload_compute_handlers.insert({{"dummy", ch},});
         }
         finished_batches.init(MAX_FINBATCH_QLEN, -1, finished_batches_arrbuf);
+        memset(tasks, 0, sizeof(OffloadTask *) * NBA_MAX_COPROCESSOR_TYPES);
     }
     virtual ~OffloadableElement() {}
     int get_type() const { return ELEMTYPE_OFFLOADABLE | ELEMTYPE_SCHEDULABLE; }
diff --git a/include/nba/engines/cuda/mempool.hh b/include/nba/engines/cuda/mempool.hh
index b5e0073..46f5801 100644
--- a/include/nba/engines/cuda/mempool.hh
+++ b/include/nba/engines/cuda/mempool.hh
@@ -23,7 +23,7 @@ public:
 
     virtual bool init(size_t max_size)
     {
-        max_size = max_size;
+        this->max_size = max_size;
         cutilSafeCall(cudaMalloc((void **) &base, max_size));
         return true;
     }
@@ -66,7 +66,7 @@ public:
 
     virtual bool init(unsigned long size)
     {
-        max_size = size;
+        this->max_size = size;
         cutilSafeCall(cudaHostAlloc((void **) &base, size,
                       this->flags));
         return true;
@@ -74,7 +74,7 @@ public:
 
     bool init_with_flags(unsigned long size, int flags)
     {
-        max_size = size;
+        this->max_size = size;
         cutilSafeCall(cudaHostAlloc((void **) &base, size,
                       flags));
         return true;
diff --git a/include/nba/engines/dummy/mempool.hh b/include/nba/engines/dummy/mempool.hh
index 52db1d9..1d12093 100644
--- a/include/nba/engines/dummy/mempool.hh
+++ b/include/nba/engines/dummy/mempool.hh
@@ -21,7 +21,7 @@ public:
 
     virtual bool init(unsigned long size)
     {
-        max_size = size;
+        this->max_size = size;
         base = malloc(size);
         return true;
     }
diff --git a/include/nba/engines/phi/mempool.hh b/include/nba/engines/phi/mempool.hh
index 18a846e..fe2f505 100644
--- a/include/nba/engines/phi/mempool.hh
+++ b/include/nba/engines/phi/mempool.hh
@@ -25,7 +25,7 @@ public:
 
     virtual bool init(unsigned long max_size)
     {
-        max_size = max_size;
+        this->max_size = max_size;
         cl_int err_ret;
         clbuf = clCreateBuffer(clctx, CL_MEM_HOST_NO_ACCESS |
                           (direction_hint == HOST_TO_DEVICE ? CL_MEM_READ_ONLY : CL_MEM_WRITE_ONLY),
@@ -85,7 +85,7 @@ public:
     virtual bool init(size_t max_size)
     {
         void *ret = NULL;
-        max_size = max_size;
+        this->max_size = max_size;
         base = (uint8_t*) malloc(max_size);
         return ret;
     }
diff --git a/src/engines/cuda/computecontext.cc b/src/engines/cuda/computecontext.cc
index 233cac2..8e56401 100644
--- a/src/engines/cuda/computecontext.cc
+++ b/src/engines/cuda/computecontext.cc
@@ -76,7 +76,7 @@ size_t CUDAComputeContext::get_input_size(io_base_t io_base) const
 size_t CUDAComputeContext::get_output_size(io_base_t io_base) const
 {
     unsigned i = io_base;
-    return _cpu_mempool_in[i].get_alloc_size();
+    return _cpu_mempool_out[i].get_alloc_size();
 }
 
 int CUDAComputeContext::alloc_input_buffer(io_base_t io_base, size_t size, void **host_ptr, memory_t *dev_mem)
diff --git a/src/engines/dummy/computecontext.cc b/src/engines/dummy/computecontext.cc
index 05fd6a2..9936c6f 100644
--- a/src/engines/dummy/computecontext.cc
+++ b/src/engines/dummy/computecontext.cc
@@ -60,7 +60,7 @@ size_t DummyComputeContext::get_input_size(io_base_t io_base) const
 size_t DummyComputeContext::get_output_size(io_base_t io_base) const
 {
     unsigned i = io_base;
-    return _cpu_mempool_in[i].get_alloc_size();
+    return _cpu_mempool_out[i].get_alloc_size();
 }
 
 int DummyComputeContext::alloc_input_buffer(io_base_t io_base, size_t size, void **host_ptr, memory_t *dev_mem)
diff --git a/src/lib/io.cc b/src/lib/io.cc
index bf50650..d70d0b3 100644
--- a/src/lib/io.cc
+++ b/src/lib/io.cc
@@ -685,7 +685,7 @@ int io_loop(void *arg)
 
     snprintf(temp, RTE_MEMPOOL_NAMESIZE,
         "comp.dbstate.%u:%u@%u", ctx->loc.node_id, ctx->loc.local_thread_idx, ctx->loc.core_id);
-    size_t dbstate_pool_size = NBA_MAX_COPROC_PPDEPTH;
+    size_t dbstate_pool_size = NBA_MAX_COPROC_PPDEPTH * 16;
     size_t dbstate_item_size = sizeof(struct datablock_tracker) * NBA_MAX_DATABLOCKS;
     ctx->comp_ctx->dbstate_pool = rte_mempool_create(temp, dbstate_pool_size + 1,
                                                      dbstate_item_size, 32,
diff --git a/src/lib/offloadtask.cc b/src/lib/offloadtask.cc
index e5393a4..ee90a8d 100644
--- a/src/lib/offloadtask.cc
+++ b/src/lib/offloadtask.cc
@@ -100,12 +100,9 @@ void OffloadTask::prepare_read_buffer()
                 for (PacketBatch *batch : batches) {
                     struct datablock_tracker *t = &batch->datablock_states[dbid];
                     tie(t->in_size, t->in_count) = db->calc_read_buffer_size(batch);
-                    t->host_in_ptr    = nullptr;
-                    t->dev_in_ptr.ptr = nullptr;
                     if (t->in_size > 0 && t->in_count > 0) {
                         cctx->alloc_input_buffer(io_base, t->in_size,
                                                  (void **) &t->host_in_ptr, &t->dev_in_ptr);
-                        assert(t->host_in_ptr != nullptr);
                         db->preprocess(batch, t->host_in_ptr);
                     }
                 }
@@ -148,7 +145,6 @@ void OffloadTask::prepare_write_buffer()
                             cctx->alloc_output_buffer(io_base, t->out_size,
                                                       (void **) &t->host_out_ptr,
                                                       &t->dev_out_ptr);
-                            assert(t->host_out_ptr != nullptr);
                         }
                     }
                 }