From 59ef7a12cb720f43e4448511a8b85652afc14e52 Mon Sep 17 00:00:00 2001
From: "sungjun.choi" <sungjun.choi@navercorp.com>
Date: Wed, 7 Sep 2016 11:15:34 +0900
Subject: [PATCH 1/8] add gpumat mode for detect

---
 Makefile                    |   2 +-
 examples/ssd/ssd_detect.cpp | 165 +++++++++++++++++++++++++++---------
 2 files changed, 126 insertions(+), 41 deletions(-)
diff --git a/Makefile b/Makefile
index 53826636cac..e2ce9405425 100644
--- a/Makefile
+++ b/Makefile
@@ -192,7 +192,7 @@ ifeq ($(USE_LMDB), 1)
 	LIBRARIES += lmdb
 endif
 ifeq ($(USE_OPENCV), 1)
-	LIBRARIES += opencv_core opencv_highgui opencv_imgproc
+	LIBRARIES += opencv_core opencv_highgui opencv_imgproc opencv_gpu
 
 	ifeq ($(OPENCV_VERSION), 3)
 		LIBRARIES += opencv_imgcodecs opencv_videoio
diff --git a/examples/ssd/ssd_detect.cpp b/examples/ssd/ssd_detect.cpp
index 3aadc9c79f4..6b5ccca1475 100644
--- a/examples/ssd/ssd_detect.cpp
+++ b/examples/ssd/ssd_detect.cpp
@@ -12,9 +12,13 @@
 //    folder/video1.mp4
 //    folder/video2.mp4
 //
+
+#define USE_OPENCV_GPU 1
+
 #include <caffe/caffe.hpp>
 #ifdef USE_OPENCV
 #include <opencv2/core/core.hpp>
+#include <opencv2/gpu/gpu.hpp>
 #include <opencv2/highgui/highgui.hpp>
 #include <opencv2/imgproc/imgproc.hpp>
 #endif  // USE_OPENCV
@@ -28,6 +32,7 @@
 
 #ifdef USE_OPENCV
 using namespace caffe;  // NOLINT(build/namespaces)
+using namespace cv;
 
 class Detector {
  public:
@@ -36,21 +41,33 @@ class Detector {
            const string& mean_file,
            const string& mean_value);
 
-  std::vector<vector<float> > Detect(const cv::Mat& img);
+  std::vector<vector<float> > Detect(const Mat& img);
 
  private:
   void SetMean(const string& mean_file, const string& mean_value);
 
-  void WrapInputLayer(std::vector<cv::Mat>* input_channels);
+  void WrapInputLayer(std::vector<Mat>* input_channels);
+  void WrapInputLayer(std::vector<gpu::GpuMat>* input_channels);
 
-  void Preprocess(const cv::Mat& img,
-                  std::vector<cv::Mat>* input_channels);
+  void Preprocess(const Mat& img,
+                  std::vector<Mat>* input_channels);
+  void Preprocess(const Mat& img,
+                    std::vector<gpu::GpuMat>* input_channels);
 
  private:
   shared_ptr<Net<float> > net_;
-  cv::Size input_geometry_;
+  Size input_geometry_;
   int num_channels_;
-  cv::Mat mean_;
+  Mat mean_;
+  
+  std::vector<gpu::GpuMat> input_channels_gpu;
+  
+  gpu::GpuMat mean_gpu;
+  gpu::GpuMat g_img;
+  gpu::GpuMat sample;
+  gpu::GpuMat sample_resized;  
+  gpu::GpuMat sample_float;
+  gpu::GpuMat sample_normalized;
 };
 
 Detector::Detector(const string& model_file,
@@ -74,24 +91,28 @@ Detector::Detector(const string& model_file,
   num_channels_ = input_layer->channels();
   CHECK(num_channels_ == 3 || num_channels_ == 1)
     << "Input layer should have 1 or 3 channels.";
-  input_geometry_ = cv::Size(input_layer->width(), input_layer->height());
+  input_geometry_ = Size(input_layer->width(), input_layer->height());
 
   /* Load the binaryproto mean file. */
   SetMean(mean_file, mean_value);
 }
 
-std::vector<vector<float> > Detector::Detect(const cv::Mat& img) {
+std::vector<vector<float> > Detector::Detect(const Mat& img) {
   Blob<float>* input_layer = net_->input_blobs()[0];
   input_layer->Reshape(1, num_channels_,
                        input_geometry_.height, input_geometry_.width);
   /* Forward dimension change to all layers. */
   net_->Reshape();
 
-  std::vector<cv::Mat> input_channels;
+#if CPU_ONLY || !USE_OPENCV_GPU
+  std::vector<Mat> input_channels;
   WrapInputLayer(&input_channels);
-
   Preprocess(img, &input_channels);
-
+#else
+  WrapInputLayer(&input_channels_gpu);
+  Preprocess(img, &input_channels_gpu);
+#endif
+  
   net_->Forward();
 
   /* Copy the output layer to a std::vector */
@@ -114,7 +135,7 @@ std::vector<vector<float> > Detector::Detect(const cv::Mat& img) {
 
 /* Load the mean file in binaryproto format. */
 void Detector::SetMean(const string& mean_file, const string& mean_value) {
-  cv::Scalar channel_mean;
+  Scalar channel_mean;
   if (!mean_file.empty()) {
     CHECK(mean_value.empty()) <<
       "Cannot specify mean_file and mean_value at the same time";
@@ -128,23 +149,23 @@ void Detector::SetMean(const string& mean_file, const string& mean_value) {
       << "Number of channels of mean file doesn't match input layer.";
 
     /* The format of the mean file is planar 32-bit float BGR or grayscale. */
-    std::vector<cv::Mat> channels;
+    std::vector<Mat> channels;
     float* data = mean_blob.mutable_cpu_data();
     for (int i = 0; i < num_channels_; ++i) {
       /* Extract an individual channel. */
-      cv::Mat channel(mean_blob.height(), mean_blob.width(), CV_32FC1, data);
+      Mat channel(mean_blob.height(), mean_blob.width(), CV_32FC1, data);
       channels.push_back(channel);
       data += mean_blob.height() * mean_blob.width();
     }
 
     /* Merge the separate channels into a single image. */
-    cv::Mat mean;
-    cv::merge(channels, mean);
+    Mat mean;
+    merge(channels, mean);
 
     /* Compute the global mean pixel value and create a mean image
      * filled with this value. */
     channel_mean = cv::mean(mean);
-    mean_ = cv::Mat(input_geometry_, mean.type(), channel_mean);
+    mean_ = Mat(input_geometry_, mean.type(), channel_mean);
   }
   if (!mean_value.empty()) {
     CHECK(mean_file.empty()) <<
@@ -159,15 +180,16 @@ void Detector::SetMean(const string& mean_file, const string& mean_value) {
     CHECK(values.size() == 1 || values.size() == num_channels_) <<
       "Specify either 1 mean_value or as many as channels: " << num_channels_;
 
-    std::vector<cv::Mat> channels;
+    std::vector<Mat> channels;
     for (int i = 0; i < num_channels_; ++i) {
       /* Extract an individual channel. */
-      cv::Mat channel(input_geometry_.height, input_geometry_.width, CV_32FC1,
-          cv::Scalar(values[i]));
+      Mat channel(input_geometry_.height, input_geometry_.width, CV_32FC1,
+          Scalar(values[i]));
       channels.push_back(channel);
     }
-    cv::merge(channels, mean_);
+    merge(channels, mean_);
   }
+  mean_gpu.upload(mean_);
 }
 
 /* Wrap the input layer of the network in separate cv::Mat objects
@@ -175,59 +197,112 @@ void Detector::SetMean(const string& mean_file, const string& mean_value) {
  * don't need to rely on cudaMemcpy2D. The last preprocessing
  * operation will write the separate channels directly to the input
  * layer. */
-void Detector::WrapInputLayer(std::vector<cv::Mat>* input_channels) {
+void Detector::WrapInputLayer(std::vector<Mat>* input_channels) {
   Blob<float>* input_layer = net_->input_blobs()[0];
 
   int width = input_layer->width();
   int height = input_layer->height();
   float* input_data = input_layer->mutable_cpu_data();
   for (int i = 0; i < input_layer->channels(); ++i) {
-    cv::Mat channel(height, width, CV_32FC1, input_data);
+    Mat channel(height, width, CV_32FC1, input_data);
     input_channels->push_back(channel);
     input_data += width * height;
   }
 }
 
-void Detector::Preprocess(const cv::Mat& img,
-                            std::vector<cv::Mat>* input_channels) {
+void Detector::WrapInputLayer(std::vector<gpu::GpuMat>* input_channels) {
+  Blob<float>* input_layer = net_->input_blobs()[0];
+
+  int width = input_layer->width();
+  int height = input_layer->height();
+  float* input_data = input_layer->mutable_gpu_data();
+  for (int i = 0; i < input_layer->channels(); ++i) {
+    gpu::GpuMat channel(height, width, CV_32FC1, input_data);
+    input_channels->push_back(channel);
+    input_data += width * height;
+  }
+}
+
+void Detector::Preprocess(const Mat& img,
+                            std::vector<Mat>* input_channels) {
   /* Convert the input image to the input image format of the network. */
-  cv::Mat sample;
+  Mat sample;
   if (img.channels() == 3 && num_channels_ == 1)
-    cv::cvtColor(img, sample, cv::COLOR_BGR2GRAY);
+    cvtColor(img, sample, COLOR_BGR2GRAY);
   else if (img.channels() == 4 && num_channels_ == 1)
-    cv::cvtColor(img, sample, cv::COLOR_BGRA2GRAY);
+    cvtColor(img, sample, COLOR_BGRA2GRAY);
   else if (img.channels() == 4 && num_channels_ == 3)
-    cv::cvtColor(img, sample, cv::COLOR_BGRA2BGR);
+    cvtColor(img, sample, COLOR_BGRA2BGR);
   else if (img.channels() == 1 && num_channels_ == 3)
-    cv::cvtColor(img, sample, cv::COLOR_GRAY2BGR);
+    cvtColor(img, sample, COLOR_GRAY2BGR);
   else
     sample = img;
 
-  cv::Mat sample_resized;
+  Mat sample_resized;
   if (sample.size() != input_geometry_)
-    cv::resize(sample, sample_resized, input_geometry_);
+    resize(sample, sample_resized, input_geometry_);
   else
     sample_resized = sample;
 
-  cv::Mat sample_float;
+  Mat sample_float;
   if (num_channels_ == 3)
     sample_resized.convertTo(sample_float, CV_32FC3);
   else
     sample_resized.convertTo(sample_float, CV_32FC1);
 
-  cv::Mat sample_normalized;
-  cv::subtract(sample_float, mean_, sample_normalized);
+  Mat sample_normalized;
+  subtract(sample_float, mean_, sample_normalized);
 
   /* This operation will write the separate BGR planes directly to the
-   * input layer of the network because it is wrapped by the cv::Mat
+   * input layer of the network because it is wrapped by the Mat
    * objects in input_channels. */
-  cv::split(sample_normalized, *input_channels);
+  split(sample_normalized, *input_channels);
 
   CHECK(reinterpret_cast<float*>(input_channels->at(0).data)
         == net_->input_blobs()[0]->cpu_data())
     << "Input channels are not wrapping the input layer of the network.";
 }
 
+void Detector::Preprocess(const Mat& img,
+                            std::vector<gpu::GpuMat>* input_channels) {
+  /* Convert the input image to the input image format of the network. */
+  if(img.channels() == num_channels_)
+	  sample.upload(img);
+  else
+  { 
+	  g_img.upload(img);
+  	  if (g_img.channels() == 3 && num_channels_ == 1)
+		cvtColor(g_img, sample, COLOR_BGR2GRAY);
+	  else if (g_img.channels() == 4 && num_channels_ == 1)
+		cvtColor(g_img, sample, COLOR_BGRA2GRAY);
+	  else if (g_img.channels() == 4 && num_channels_ == 3)
+		cvtColor(g_img, sample, COLOR_BGRA2BGR);
+	  else if (g_img.channels() == 1 && num_channels_ == 3)
+		cvtColor(g_img, sample, COLOR_GRAY2BGR);
+  }
+
+  if (sample.size() != input_geometry_)
+    resize(sample, sample_resized, input_geometry_);
+  else
+    sample_resized = sample;
+
+  if (num_channels_ == 3)
+    sample_resized.convertTo(sample_float, CV_32FC3);
+  else
+    sample_resized.convertTo(sample_float, CV_32FC1);
+
+  subtract(sample_float, mean_gpu, sample_normalized);
+  
+  /* This operation will write the separate BGR planes directly to the
+   * input layer of the network because it is wrapped by the Mat
+   * objects in input_channels. */
+  split(sample_normalized, *input_channels);
+
+  CHECK(reinterpret_cast<float*>(input_channels->at(0).data)
+        == net_->input_blobs()[0]->gpu_data())
+    << "Input channels are not wrapping the input layer of the network.";
+}
+
 DEFINE_string(mean_file, "",
     "The mean file used to subtract from the input image.");
 DEFINE_string(mean_value, "104,117,123",
@@ -285,9 +360,15 @@ int main(int argc, char** argv) {
   // Process image one by one.
   std::ifstream infile(argv[3]);
   std::string file;
+  //Time Check
+  struct timeval start_point, end_point;
+  double operating_time;
+  gettimeofday(&start_point, NULL);  
+  int iFrame = 0;
+
   while (infile >> file) {
     if (file_type == "image") {
-      cv::Mat img = cv::imread(file, -1);
+      Mat img = imread(file, -1);
       CHECK(!img.empty()) << "Unable to decode image " << file;
       std::vector<vector<float> > detections = detector.Detect(img);
 
@@ -307,12 +388,13 @@ int main(int argc, char** argv) {
           out << static_cast<int>(d[6] * img.rows) << std::endl;
         }
       }
+      iFrame++;
     } else if (file_type == "video") {
-      cv::VideoCapture cap(file);
+      VideoCapture cap(file);
       if (!cap.isOpened()) {
         LOG(FATAL) << "Failed to open video: " << file;
       }
-      cv::Mat img;
+      Mat img;
       int frame_count = 0;
       while (true) {
         bool success = cap.read(img);
@@ -349,6 +431,9 @@ int main(int argc, char** argv) {
       LOG(FATAL) << "Unknown file_type: " << file_type;
     }
   }
+  gettimeofday(&end_point, NULL); 
+  operating_time = (double)(end_point.tv_sec)+(double)(end_point.tv_usec)/1000000.0-(double)(start_point.tv_sec)-(double)(start_point.tv_usec)/1000000.0;
+  printf("%d Frame, %f, %f fps\n", iFrame, operating_time, (float)iFrame / operating_time );
   return 0;
 }
 #else

From d81550b1d8f154a760c9dce079fe8d28e5f04a2b Mon Sep 17 00:00:00 2001
From: "sungjun.choi" <sungjun.choi@navercorp.com>
Date: Wed, 7 Sep 2016 14:27:40 +0900
Subject: [PATCH 2/8] fix ssd_detect error with gpumat

---
 examples/ssd/ssd_detect.cpp | 129 ++++++++++++++++++------------------
 1 file changed, 64 insertions(+), 65 deletions(-)

diff --git a/examples/ssd/ssd_detect.cpp b/examples/ssd/ssd_detect.cpp
index 6b5ccca1475..77a63ebe54d 100644
--- a/examples/ssd/ssd_detect.cpp
+++ b/examples/ssd/ssd_detect.cpp
@@ -32,7 +32,6 @@
 
 #ifdef USE_OPENCV
 using namespace caffe;  // NOLINT(build/namespaces)
-using namespace cv;
 
 class Detector {
  public:
@@ -41,33 +40,31 @@ class Detector {
            const string& mean_file,
            const string& mean_value);
 
-  std::vector<vector<float> > Detect(const Mat& img);
+  std::vector<vector<float> > Detect(const cv::Mat& img);
 
  private:
   void SetMean(const string& mean_file, const string& mean_value);
 
-  void WrapInputLayer(std::vector<Mat>* input_channels);
-  void WrapInputLayer(std::vector<gpu::GpuMat>* input_channels);
-
-  void Preprocess(const Mat& img,
-                  std::vector<Mat>* input_channels);
-  void Preprocess(const Mat& img,
-                    std::vector<gpu::GpuMat>* input_channels);
-
+  void WrapInputLayer(std::vector<cv::Mat>* input_channels);
+  void WrapInputLayer(std::vector<cv::gpu::GpuMat>* input_channels);
+  void Preprocess(const cv::Mat& img,
+                  std::vector<cv::Mat>* input_channels);
+  void Preprocess(const cv::Mat& img,
+                  std::vector<cv::gpu::GpuMat>* input_channels);
  private:
   shared_ptr<Net<float> > net_;
-  Size input_geometry_;
+  cv::Size input_geometry_;
   int num_channels_;
-  Mat mean_;
-  
-  std::vector<gpu::GpuMat> input_channels_gpu;
-  
-  gpu::GpuMat mean_gpu;
-  gpu::GpuMat g_img;
-  gpu::GpuMat sample;
-  gpu::GpuMat sample_resized;  
-  gpu::GpuMat sample_float;
-  gpu::GpuMat sample_normalized;
+  cv::Mat mean_;
+
+  std::vector<cv::gpu::GpuMat> input_channels_gpu;
+  cv::gpu::GpuMat mean_gpu;
+  cv::gpu::GpuMat g_img;
+  cv::gpu::GpuMat sample;
+  cv::gpu::GpuMat sample_resized;
+  cv::gpu::GpuMat sample_float;
+  cv::gpu::GpuMat sample_normalized;
+
 };
 
 Detector::Detector(const string& model_file,
@@ -91,13 +88,13 @@ Detector::Detector(const string& model_file,
   num_channels_ = input_layer->channels();
   CHECK(num_channels_ == 3 || num_channels_ == 1)
     << "Input layer should have 1 or 3 channels.";
-  input_geometry_ = Size(input_layer->width(), input_layer->height());
+  input_geometry_ = cv::Size(input_layer->width(), input_layer->height());
 
   /* Load the binaryproto mean file. */
   SetMean(mean_file, mean_value);
 }
 
-std::vector<vector<float> > Detector::Detect(const Mat& img) {
+std::vector<vector<float> > Detector::Detect(const cv::Mat& img) {
   Blob<float>* input_layer = net_->input_blobs()[0];
   input_layer->Reshape(1, num_channels_,
                        input_geometry_.height, input_geometry_.width);
@@ -112,7 +109,7 @@ std::vector<vector<float> > Detector::Detect(const Mat& img) {
   WrapInputLayer(&input_channels_gpu);
   Preprocess(img, &input_channels_gpu);
 #endif
-  
+
   net_->Forward();
 
   /* Copy the output layer to a std::vector */
@@ -135,7 +132,7 @@ std::vector<vector<float> > Detector::Detect(const Mat& img) {
 
 /* Load the mean file in binaryproto format. */
 void Detector::SetMean(const string& mean_file, const string& mean_value) {
-  Scalar channel_mean;
+  cv::Scalar channel_mean;
   if (!mean_file.empty()) {
     CHECK(mean_value.empty()) <<
       "Cannot specify mean_file and mean_value at the same time";
@@ -149,23 +146,23 @@ void Detector::SetMean(const string& mean_file, const string& mean_value) {
       << "Number of channels of mean file doesn't match input layer.";
 
     /* The format of the mean file is planar 32-bit float BGR or grayscale. */
-    std::vector<Mat> channels;
+    std::vector<cv::Mat> channels;
     float* data = mean_blob.mutable_cpu_data();
     for (int i = 0; i < num_channels_; ++i) {
       /* Extract an individual channel. */
-      Mat channel(mean_blob.height(), mean_blob.width(), CV_32FC1, data);
+      cv::Mat channel(mean_blob.height(), mean_blob.width(), CV_32FC1, data);
       channels.push_back(channel);
       data += mean_blob.height() * mean_blob.width();
     }
 
     /* Merge the separate channels into a single image. */
-    Mat mean;
-    merge(channels, mean);
+    cv::Mat mean;
+    cv::merge(channels, mean);
 
     /* Compute the global mean pixel value and create a mean image
      * filled with this value. */
     channel_mean = cv::mean(mean);
-    mean_ = Mat(input_geometry_, mean.type(), channel_mean);
+    mean_ = cv::Mat(input_geometry_, mean.type(), channel_mean);
   }
   if (!mean_value.empty()) {
     CHECK(mean_file.empty()) <<
@@ -180,16 +177,18 @@ void Detector::SetMean(const string& mean_file, const string& mean_value) {
     CHECK(values.size() == 1 || values.size() == num_channels_) <<
       "Specify either 1 mean_value or as many as channels: " << num_channels_;
 
-    std::vector<Mat> channels;
+    std::vector<cv::Mat> channels;
     for (int i = 0; i < num_channels_; ++i) {
       /* Extract an individual channel. */
-      Mat channel(input_geometry_.height, input_geometry_.width, CV_32FC1,
-          Scalar(values[i]));
+      cv::Mat channel(input_geometry_.height, input_geometry_.width, CV_32FC1,
+          cv::Scalar(values[i]));
       channels.push_back(channel);
     }
-    merge(channels, mean_);
+    cv::merge(channels, mean_);
   }
+#if !CPU_ONLY && USE_OPENCV_GPU
   mean_gpu.upload(mean_);
+#endif
 }
 
 /* Wrap the input layer of the network in separate cv::Mat objects
@@ -197,74 +196,74 @@ void Detector::SetMean(const string& mean_file, const string& mean_value) {
  * don't need to rely on cudaMemcpy2D. The last preprocessing
  * operation will write the separate channels directly to the input
  * layer. */
-void Detector::WrapInputLayer(std::vector<Mat>* input_channels) {
+void Detector::WrapInputLayer(std::vector<cv::Mat>* input_channels) {
   Blob<float>* input_layer = net_->input_blobs()[0];
 
   int width = input_layer->width();
   int height = input_layer->height();
   float* input_data = input_layer->mutable_cpu_data();
   for (int i = 0; i < input_layer->channels(); ++i) {
-    Mat channel(height, width, CV_32FC1, input_data);
+    cv::Mat channel(height, width, CV_32FC1, input_data);
     input_channels->push_back(channel);
     input_data += width * height;
   }
 }
 
-void Detector::WrapInputLayer(std::vector<gpu::GpuMat>* input_channels) {
+void Detector::WrapInputLayer(std::vector<cv::gpu::GpuMat>* input_channels) {
   Blob<float>* input_layer = net_->input_blobs()[0];
 
   int width = input_layer->width();
   int height = input_layer->height();
   float* input_data = input_layer->mutable_gpu_data();
   for (int i = 0; i < input_layer->channels(); ++i) {
-    gpu::GpuMat channel(height, width, CV_32FC1, input_data);
+    cv::gpu::GpuMat channel(height, width, CV_32FC1, input_data);
     input_channels->push_back(channel);
     input_data += width * height;
   }
 }
 
-void Detector::Preprocess(const Mat& img,
-                            std::vector<Mat>* input_channels) {
+void Detector::Preprocess(const cv::Mat& img,
+                            std::vector<cv::Mat>* input_channels) {
   /* Convert the input image to the input image format of the network. */
-  Mat sample;
+  cv::Mat sample;
   if (img.channels() == 3 && num_channels_ == 1)
-    cvtColor(img, sample, COLOR_BGR2GRAY);
+    cv::cvtColor(img, sample, cv::COLOR_BGR2GRAY);
   else if (img.channels() == 4 && num_channels_ == 1)
-    cvtColor(img, sample, COLOR_BGRA2GRAY);
+    cv::cvtColor(img, sample, cv::COLOR_BGRA2GRAY);
   else if (img.channels() == 4 && num_channels_ == 3)
-    cvtColor(img, sample, COLOR_BGRA2BGR);
+    cv::cvtColor(img, sample, cv::COLOR_BGRA2BGR);
   else if (img.channels() == 1 && num_channels_ == 3)
-    cvtColor(img, sample, COLOR_GRAY2BGR);
+    cv::cvtColor(img, sample, cv::COLOR_GRAY2BGR);
   else
     sample = img;
 
-  Mat sample_resized;
+  cv::Mat sample_resized;
   if (sample.size() != input_geometry_)
-    resize(sample, sample_resized, input_geometry_);
+    cv::resize(sample, sample_resized, input_geometry_);
   else
     sample_resized = sample;
 
-  Mat sample_float;
+  cv::Mat sample_float;
   if (num_channels_ == 3)
     sample_resized.convertTo(sample_float, CV_32FC3);
   else
     sample_resized.convertTo(sample_float, CV_32FC1);
 
-  Mat sample_normalized;
-  subtract(sample_float, mean_, sample_normalized);
+  cv::Mat sample_normalized;
+  cv::subtract(sample_float, mean_, sample_normalized);
 
   /* This operation will write the separate BGR planes directly to the
-   * input layer of the network because it is wrapped by the Mat
+   * input layer of the network because it is wrapped by the cv::Mat
    * objects in input_channels. */
-  split(sample_normalized, *input_channels);
+  cv::split(sample_normalized, *input_channels);
 
   CHECK(reinterpret_cast<float*>(input_channels->at(0).data)
         == net_->input_blobs()[0]->cpu_data())
     << "Input channels are not wrapping the input layer of the network.";
 }
 
-void Detector::Preprocess(const Mat& img,
-                            std::vector<gpu::GpuMat>* input_channels) {
+void Detector::Preprocess(const cv::Mat& img,
+                            std::vector<cv::gpu::GpuMat>* input_channels) {
   /* Convert the input image to the input image format of the network. */
   if(img.channels() == num_channels_)
 	  sample.upload(img);
@@ -272,17 +271,17 @@ void Detector::Preprocess(const Mat& img,
   { 
 	  g_img.upload(img);
   	  if (g_img.channels() == 3 && num_channels_ == 1)
-		cvtColor(g_img, sample, COLOR_BGR2GRAY);
+		cv::gpu::cvtColor(g_img, sample, cv::COLOR_BGR2GRAY);
 	  else if (g_img.channels() == 4 && num_channels_ == 1)
-		cvtColor(g_img, sample, COLOR_BGRA2GRAY);
+		cv::gpu::cvtColor(g_img, sample, cv::COLOR_BGRA2GRAY);
 	  else if (g_img.channels() == 4 && num_channels_ == 3)
-		cvtColor(g_img, sample, COLOR_BGRA2BGR);
+		cv::gpu::cvtColor(g_img, sample, cv::COLOR_BGRA2BGR);
 	  else if (g_img.channels() == 1 && num_channels_ == 3)
-		cvtColor(g_img, sample, COLOR_GRAY2BGR);
+		cv::gpu::cvtColor(g_img, sample, cv::COLOR_GRAY2BGR);
   }
 
   if (sample.size() != input_geometry_)
-    resize(sample, sample_resized, input_geometry_);
+    cv::gpu::resize(sample, sample_resized, input_geometry_);
   else
     sample_resized = sample;
 
@@ -291,12 +290,12 @@ void Detector::Preprocess(const Mat& img,
   else
     sample_resized.convertTo(sample_float, CV_32FC1);
 
-  subtract(sample_float, mean_gpu, sample_normalized);
-  
+  cv::gpu::subtract(sample_float, mean_gpu, sample_normalized);
+
   /* This operation will write the separate BGR planes directly to the
    * input layer of the network because it is wrapped by the Mat
    * objects in input_channels. */
-  split(sample_normalized, *input_channels);
+  cv::gpu::split(sample_normalized, *input_channels);
 
   CHECK(reinterpret_cast<float*>(input_channels->at(0).data)
         == net_->input_blobs()[0]->gpu_data())
@@ -368,7 +367,7 @@ int main(int argc, char** argv) {
 
   while (infile >> file) {
     if (file_type == "image") {
-      Mat img = imread(file, -1);
+      cv::Mat img = cv::imread(file, -1);
       CHECK(!img.empty()) << "Unable to decode image " << file;
       std::vector<vector<float> > detections = detector.Detect(img);
 
@@ -390,11 +389,11 @@ int main(int argc, char** argv) {
       }
       iFrame++;
     } else if (file_type == "video") {
-      VideoCapture cap(file);
+      cv::VideoCapture cap(file);
       if (!cap.isOpened()) {
         LOG(FATAL) << "Failed to open video: " << file;
       }
-      Mat img;
+      cv::Mat img;
       int frame_count = 0;
       while (true) {
         bool success = cap.read(img);

From e4a6e094543d3d349c09ffe48ec5c29bc15ecf2d Mon Sep 17 00:00:00 2001
From: "sungjun.choi" <sungjun.choi@navercorp.com>
Date: Wed, 7 Sep 2016 14:37:00 +0900
Subject: [PATCH 3/8] modify ssd_detect compile error

---
 examples/ssd/ssd_detect.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/examples/ssd/ssd_detect.cpp b/examples/ssd/ssd_detect.cpp
index 77a63ebe54d..14a8d0760b2 100644
--- a/examples/ssd/ssd_detect.cpp
+++ b/examples/ssd/ssd_detect.cpp
@@ -102,7 +102,7 @@ std::vector<vector<float> > Detector::Detect(const cv::Mat& img) {
   net_->Reshape();
 
 #if CPU_ONLY || !USE_OPENCV_GPU
-  std::vector<Mat> input_channels;
+  std::vector<cv::Mat> input_channels;
   WrapInputLayer(&input_channels);
   Preprocess(img, &input_channels);
 #else

From 63ee2f22007d943b2ef76935777c9e93ec2586e1 Mon Sep 17 00:00:00 2001
From: "sungjun.choi" <sungjun.choi@navercorp.com>
Date: Wed, 7 Sep 2016 15:09:41 +0900
Subject: [PATCH 4/8] modify ssd_detect travis error

---
 examples/ssd/ssd_detect.cpp | 66 ++++++++++++++++++++-----------------
 1 file changed, 35 insertions(+), 31 deletions(-)

diff --git a/examples/ssd/ssd_detect.cpp b/examples/ssd/ssd_detect.cpp
index 14a8d0760b2..469f5b0455f 100644
--- a/examples/ssd/ssd_detect.cpp
+++ b/examples/ssd/ssd_detect.cpp
@@ -51,12 +51,13 @@ class Detector {
                   std::vector<cv::Mat>* input_channels);
   void Preprocess(const cv::Mat& img,
                   std::vector<cv::gpu::GpuMat>* input_channels);
+
  private:
   shared_ptr<Net<float> > net_;
   cv::Size input_geometry_;
   int num_channels_;
   cv::Mat mean_;
-
+#if !CPU_ONLY && USE_OPENCV_GPU
   std::vector<cv::gpu::GpuMat> input_channels_gpu;
   cv::gpu::GpuMat mean_gpu;
   cv::gpu::GpuMat g_img;
@@ -64,7 +65,7 @@ class Detector {
   cv::gpu::GpuMat sample_resized;
   cv::gpu::GpuMat sample_float;
   cv::gpu::GpuMat sample_normalized;
-
+#endif
 };
 
 Detector::Detector(const string& model_file,
@@ -209,19 +210,6 @@ void Detector::WrapInputLayer(std::vector<cv::Mat>* input_channels) {
   }
 }
 
-void Detector::WrapInputLayer(std::vector<cv::gpu::GpuMat>* input_channels) {
-  Blob<float>* input_layer = net_->input_blobs()[0];
-
-  int width = input_layer->width();
-  int height = input_layer->height();
-  float* input_data = input_layer->mutable_gpu_data();
-  for (int i = 0; i < input_layer->channels(); ++i) {
-    cv::gpu::GpuMat channel(height, width, CV_32FC1, input_data);
-    input_channels->push_back(channel);
-    input_data += width * height;
-  }
-}
-
 void Detector::Preprocess(const cv::Mat& img,
                             std::vector<cv::Mat>* input_channels) {
   /* Convert the input image to the input image format of the network. */
@@ -262,22 +250,36 @@ void Detector::Preprocess(const cv::Mat& img,
     << "Input channels are not wrapping the input layer of the network.";
 }
 
+#if !CPU_ONLY && USE_OPENCV_GPU
+void Detector::WrapInputLayer(std::vector<cv::gpu::GpuMat>* input_channels) {
+  Blob<float>* input_layer = net_->input_blobs()[0];
+
+  int width = input_layer->width();
+  int height = input_layer->height();
+  float* input_data = input_layer->mutable_gpu_data();
+  for (int i = 0; i < input_layer->channels(); ++i) {
+    cv::gpu::GpuMat channel(height, width, CV_32FC1, input_data);
+    input_channels->push_back(channel);
+    input_data += width * height;
+  }
+}
+
 void Detector::Preprocess(const cv::Mat& img,
-                            std::vector<cv::gpu::GpuMat>* input_channels) {
+  std::vector<cv::gpu::GpuMat>* input_channels) {
   /* Convert the input image to the input image format of the network. */
   if(img.channels() == num_channels_)
-	  sample.upload(img);
+    sample.upload(img);
   else
   { 
-	  g_img.upload(img);
-  	  if (g_img.channels() == 3 && num_channels_ == 1)
-		cv::gpu::cvtColor(g_img, sample, cv::COLOR_BGR2GRAY);
-	  else if (g_img.channels() == 4 && num_channels_ == 1)
-		cv::gpu::cvtColor(g_img, sample, cv::COLOR_BGRA2GRAY);
-	  else if (g_img.channels() == 4 && num_channels_ == 3)
-		cv::gpu::cvtColor(g_img, sample, cv::COLOR_BGRA2BGR);
-	  else if (g_img.channels() == 1 && num_channels_ == 3)
-		cv::gpu::cvtColor(g_img, sample, cv::COLOR_GRAY2BGR);
+    g_img.upload(img);
+    if (g_img.channels() == 3 && num_channels_ == 1)
+      cv::gpu::cvtColor(g_img, sample, cv::COLOR_BGR2GRAY);
+    else if (g_img.channels() == 4 && num_channels_ == 1)
+      cv::gpu::cvtColor(g_img, sample, cv::COLOR_BGRA2GRAY);
+    else if (g_img.channels() == 4 && num_channels_ == 3)
+      cv::gpu::cvtColor(g_img, sample, cv::COLOR_BGRA2BGR);
+    else if (g_img.channels() == 1 && num_channels_ == 3)
+      cv::gpu::cvtColor(g_img, sample, cv::COLOR_GRAY2BGR);
   }
 
   if (sample.size() != input_geometry_)
@@ -301,6 +303,7 @@ void Detector::Preprocess(const cv::Mat& img,
         == net_->input_blobs()[0]->gpu_data())
     << "Input channels are not wrapping the input layer of the network.";
 }
+#endif
 
 DEFINE_string(mean_file, "",
     "The mean file used to subtract from the input image.");
@@ -359,10 +362,10 @@ int main(int argc, char** argv) {
   // Process image one by one.
   std::ifstream infile(argv[3]);
   std::string file;
-  //Time Check
+  // Time Check
   struct timeval start_point, end_point;
   double operating_time;
-  gettimeofday(&start_point, NULL);  
+  gettimeofday(&start_point, NULL);
   int iFrame = 0;
 
   while (infile >> file) {
@@ -430,9 +433,10 @@ int main(int argc, char** argv) {
       LOG(FATAL) << "Unknown file_type: " << file_type;
     }
   }
-  gettimeofday(&end_point, NULL); 
-  operating_time = (double)(end_point.tv_sec)+(double)(end_point.tv_usec)/1000000.0-(double)(start_point.tv_sec)-(double)(start_point.tv_usec)/1000000.0;
-  printf("%d Frame, %f, %f fps\n", iFrame, operating_time, (float)iFrame / operating_time );
+  gettimeofday(&end_point, NULL);
+  operating_time = static_cast<double>(end_point.tv_sec) + static_cast<double>(end_point.tv_usec) / 1000000.0
+                 - static_cast<double>(start_point.tv_sec) - static_cast<double>(start_point.tv_usec) / 1000000.0;
+  printf("%d Frame, %f, %f fps\n", iFrame, operating_time, static_cast<float>(iFrame) / operating_time);
   return 0;
 }
 #else

From 2693bd0d470765142d7599944efb488c11ab4fc6 Mon Sep 17 00:00:00 2001
From: "sungjun.choi" <sungjun.choi@navercorp.com>
Date: Wed, 7 Sep 2016 15:11:30 +0900
Subject: [PATCH 5/8] change link option to use opencv_gpu with only gpu_mode

---
 Makefile | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/Makefile b/Makefile
index e2ce9405425..7798d3c7fb5 100644
--- a/Makefile
+++ b/Makefile
@@ -192,8 +192,11 @@ ifeq ($(USE_LMDB), 1)
 	LIBRARIES += lmdb
 endif
 ifeq ($(USE_OPENCV), 1)
-	LIBRARIES += opencv_core opencv_highgui opencv_imgproc opencv_gpu
+	LIBRARIES += opencv_core opencv_highgui opencv_imgproc
 
+ifneq ($(CPU_ONLY), 1)
+    LIBRARIES += opencv_gpu
+endif
 	ifeq ($(OPENCV_VERSION), 3)
 		LIBRARIES += opencv_imgcodecs opencv_videoio
 	endif

From ee423671b5df3a8d120ad8725b852c657bb3e267 Mon Sep 17 00:00:00 2001
From: "sungjun.choi" <sungjun.choi@navercorp.com>
Date: Wed, 7 Sep 2016 15:54:54 +0900
Subject: [PATCH 6/8] modify ssd_detect travis error

---
 examples/ssd/ssd_detect.cpp | 11 ++++++-----
 1 file changed, 6 insertions(+), 5 deletions(-)

diff --git a/examples/ssd/ssd_detect.cpp b/examples/ssd/ssd_detect.cpp
index 469f5b0455f..4fbfc6d922a 100644
--- a/examples/ssd/ssd_detect.cpp
+++ b/examples/ssd/ssd_detect.cpp
@@ -267,10 +267,9 @@ void Detector::WrapInputLayer(std::vector<cv::gpu::GpuMat>* input_channels) {
 void Detector::Preprocess(const cv::Mat& img,
   std::vector<cv::gpu::GpuMat>* input_channels) {
   /* Convert the input image to the input image format of the network. */
-  if(img.channels() == num_channels_)
+  if (img.channels() == num_channels_)
     sample.upload(img);
-  else
-  { 
+  else {
     g_img.upload(img);
     if (g_img.channels() == 3 && num_channels_ == 1)
       cv::gpu::cvtColor(g_img, sample, cv::COLOR_BGR2GRAY);
@@ -434,8 +433,10 @@ int main(int argc, char** argv) {
     }
   }
   gettimeofday(&end_point, NULL);
-  operating_time = static_cast<double>(end_point.tv_sec) + static_cast<double>(end_point.tv_usec) / 1000000.0
-                 - static_cast<double>(start_point.tv_sec) - static_cast<double>(start_point.tv_usec) / 1000000.0;
+  operating_time = static_cast<double>(end_point.tv_sec)
+	             + static_cast<double>(end_point.tv_usec) / 1000000.0
+                 - static_cast<double>(start_point.tv_sec)
+				 - static_cast<double>(start_point.tv_usec) / 1000000.0;
   printf("%d Frame, %f, %f fps\n", iFrame, operating_time, static_cast<float>(iFrame) / operating_time);
   return 0;
 }

From 54e62e3a1c4b16146ad65bbb1e2284178254937c Mon Sep 17 00:00:00 2001
From: "sungjun.choi" <sungjun.choi@navercorp.com>
Date: Wed, 7 Sep 2016 16:17:50 +0900
Subject: [PATCH 7/8] modify ssd_detect travis error

---
 examples/ssd/ssd_detect.cpp | 11 ++++++-----
 1 file changed, 6 insertions(+), 5 deletions(-)

diff --git a/examples/ssd/ssd_detect.cpp b/examples/ssd/ssd_detect.cpp
index 4fbfc6d922a..53fc7cbb4ef 100644
--- a/examples/ssd/ssd_detect.cpp
+++ b/examples/ssd/ssd_detect.cpp
@@ -267,8 +267,9 @@ void Detector::WrapInputLayer(std::vector<cv::gpu::GpuMat>* input_channels) {
 void Detector::Preprocess(const cv::Mat& img,
   std::vector<cv::gpu::GpuMat>* input_channels) {
   /* Convert the input image to the input image format of the network. */
-  if (img.channels() == num_channels_)
+  if (img.channels() == num_channels_) {
     sample.upload(img);
+  }
   else {
     g_img.upload(img);
     if (g_img.channels() == 3 && num_channels_ == 1)
@@ -433,10 +434,10 @@ int main(int argc, char** argv) {
     }
   }
   gettimeofday(&end_point, NULL);
-  operating_time = static_cast<double>(end_point.tv_sec)
-	             + static_cast<double>(end_point.tv_usec) / 1000000.0
-                 - static_cast<double>(start_point.tv_sec)
-				 - static_cast<double>(start_point.tv_usec) / 1000000.0;
+  operating_time = static_cast<double>(end_point.tv_sec);
+  operating_time += static_cast<double>(end_point.tv_usec) / 1000000.0;
+  operating_time -= static_cast<double>(start_point.tv_sec);
+  operating_time -= static_cast<double>(start_point.tv_usec) / 1000000.0;
   printf("%d Frame, %f, %f fps\n", iFrame, operating_time, static_cast<float>(iFrame) / operating_time);
   return 0;
 }

From c2ce08224d1767b1a364e8ae9c4c6824d8665e03 Mon Sep 17 00:00:00 2001
From: "sungjun.choi" <sungjun.choi@navercorp.com>
Date: Wed, 7 Sep 2016 16:43:21 +0900
Subject: [PATCH 8/8] modify ssd_detect travis error

---
 examples/ssd/ssd_detect.cpp | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/examples/ssd/ssd_detect.cpp b/examples/ssd/ssd_detect.cpp
index 53fc7cbb4ef..1a922b0c4e9 100644
--- a/examples/ssd/ssd_detect.cpp
+++ b/examples/ssd/ssd_detect.cpp
@@ -269,8 +269,7 @@ void Detector::Preprocess(const cv::Mat& img,
   /* Convert the input image to the input image format of the network. */
   if (img.channels() == num_channels_) {
     sample.upload(img);
-  }
-  else {
+  } else {
     g_img.upload(img);
     if (g_img.channels() == 3 && num_channels_ == 1)
       cv::gpu::cvtColor(g_img, sample, cv::COLOR_BGR2GRAY);
@@ -438,7 +437,9 @@ int main(int argc, char** argv) {
   operating_time += static_cast<double>(end_point.tv_usec) / 1000000.0;
   operating_time -= static_cast<double>(start_point.tv_sec);
   operating_time -= static_cast<double>(start_point.tv_usec) / 1000000.0;
-  printf("%d Frame, %f, %f fps\n", iFrame, operating_time, static_cast<float>(iFrame) / operating_time);
+  printf("%d Frame", iFrame);
+  printf(", %f", operating_time);
+  printf(", %f fps\n", static_cast<float>(iFrame) / operating_time);
   return 0;
 }
 #else