Fix link error for newer gcc versions, Minor CMake adjustments (#226)

cjolivier01 · tqchen · commit 22e23f140cf0 · 2017-04-03T17:13:20.000-07:00
* Fix warnings, build adjustments in CMake

* fix cuda variable-declaration order problems

* Remove gpu flavors below 30, no longer compatible

* fix cuda variable-declaration order problems

* Add nvcc --run cuda library path flag for when detecting gpu flavor

* Allow CUDA_LIBRARY_PATH to not be defined

* Minor visibility adjustments

* Remove more warnings

* use static_cast instead of ()

* Fix warnings

* Fix link error, undefined kSize

* Size() for kPlain packet

* fix: Ignored test directory

* Fix lint, re-apply kSize fix
diff --git a/.gitignore b/.gitignore
@@ -14,7 +14,6 @@
 *~
 doc/html
 doc/latex
-test*
 rabit
 dmlc-core
 *.db
diff --git a/cmake/Cuda.cmake b/cmake/Cuda.cmake
@@ -31,21 +31,23 @@ set(CUDA_gpu_detect_output "")
       "}\n")
     if(MSVC)
       #find vcvarsall.bat and run it building msvc environment
-      get_filename_component(MY_COMPILER_DIR ${CMAKE_CXX_COMPILER} DIRECTORY) 
-      find_file(MY_VCVARSALL_BAT vcvarsall.bat "${MY_COMPILER_DIR}/.." "${MY_COMPILER_DIR}/../..")     
+      get_filename_component(MY_COMPILER_DIR ${CMAKE_CXX_COMPILER} DIRECTORY)
+      find_file(MY_VCVARSALL_BAT vcvarsall.bat "${MY_COMPILER_DIR}/.." "${MY_COMPILER_DIR}/../..")
       execute_process(COMMAND ${MY_VCVARSALL_BAT} && ${CUDA_NVCC_EXECUTABLE} -arch sm_30 --run  ${__cufile}
                       WORKING_DIRECTORY "${PROJECT_BINARY_DIR}/CMakeFiles/"
                       RESULT_VARIABLE __nvcc_res OUTPUT_VARIABLE __nvcc_out
                       ERROR_QUIET
                       OUTPUT_STRIP_TRAILING_WHITESPACE)
     else()
-      execute_process(COMMAND ${CUDA_NVCC_EXECUTABLE} -arch sm_30 --run ${__cufile}
+      if(CUDA_LIBRARY_PATH)
+        set(CUDA_LINK_LIBRARY_PATH "-L${CUDA_LIBRARY_PATH}")
+      endif()
+      execute_process(COMMAND ${CUDA_NVCC_EXECUTABLE} -arch sm_30 --run ${__cufile} ${CUDA_LINK_LIBRARY_PATH}
                       WORKING_DIRECTORY "${PROJECT_BINARY_DIR}/CMakeFiles/"
                       RESULT_VARIABLE __nvcc_res OUTPUT_VARIABLE __nvcc_out
                       ERROR_QUIET
                       OUTPUT_STRIP_TRAILING_WHITESPACE)
     endif()
-
     if(__nvcc_res EQUAL 0)
       # nvcc outputs text containing line breaks when building with MSVC.
       # The line below prevents CMake from inserting a variable with line
diff --git a/cmake/mshadow.cmake b/cmake/mshadow.cmake
@@ -3,6 +3,15 @@ set(mshadow_LINKER_LIBS "")
 set(BLAS "Open" CACHE STRING "Selected BLAS library")
 set_property(CACHE BLAS PROPERTY STRINGS "Atlas;Open;MKL")
 
+if(USE_MKL_IF_AVAILABLE)
+  if(NOT MKL_FOUND)
+    find_package(MKL)
+  endif()
+  if(MKL_FOUND)
+    set(BLAS "MKL")
+  endif()
+endif()
+
 if(BLAS STREQUAL "Atlas" OR BLAS STREQUAL "atlas")
   find_package(Atlas REQUIRED)
   include_directories(SYSTEM ${Atlas_INCLUDE_DIR})
diff --git a/mshadow/expression.h b/mshadow/expression.h
@@ -20,14 +20,14 @@ namespace expr {
 namespace type {
 // type expression type are defined as bitmask
 // subtype relationshop kRValue < kMapper < kPull < kComplex
-/*! 
+/*!
  * \brief this expression directly correspnds to a data class,
- *   can be used to assign data 
+ *   can be used to assign data
  */
 const int kRValue = 0;
-/*! 
+/*!
  * \brief expression contains element-wise tensor operations,
- *   map a expression to same shape 
+ *   map a expression to same shape
  */
 const int kMapper = 1;
 /*!
@@ -70,7 +70,7 @@ struct Exp {
   }
 };
 /*!
- * \brief scalar expression 
+ * \brief scalar expression
  * \tparam DType the data type of the scalar
  */
 template<typename DType>
@@ -280,12 +280,12 @@ MakeExp(const Exp<TA, DType, ta> &item1, const Exp<TB, DType, tb> &item2,
                       (ta|tb|tc|type::kMapper)>(item1.self(), item2.self(), item3.self());
 }
 /*!
- * \brief short hand for MakeExp, usage F<op>(item1,item2,item3). create a ternary operation expression 
+ * \brief short hand for MakeExp, usage F<op>(item1,item2,item3). create a ternary operation expression
  * \param item1 first operand
  * \param item2 second operand
  * \param item3 third operand
  * \return the result expression
- * \tparam ternary operator 
+ * \tparam ternary operator
  * \tparam TA item1 expression
  * \tparam ta item1 expression type
  * \tparam TB item2 expression
@@ -332,11 +332,11 @@ MakeExp(const Exp<TA, DType, ta> &lhs, const Exp<TB, DType, tb> &rhs) {
                       (ta|tb|type::kMapper)>(lhs.self(), rhs.self());
 }
 /*!
- * \brief short hand for MakeExp, usage F<op>(lhs, rhs). create a binary operation expression 
+ * \brief short hand for MakeExp, usage F<op>(lhs, rhs). create a binary operation expression
  * \param lhs left operand
  * \param rhs right operand
  * \return the result expression
- * \tparam binary operator 
+ * \tparam binary operator
  * \tparam TA lhs expression
  * \tparam ta lhs expression type
  * \tparam TB rhs expression
@@ -397,11 +397,11 @@ inline UnaryMapExp<OP, TA, DType, (ta|type::kMapper)>
 MakeExp(const Exp<TA, DType, ta> &src) {
   return UnaryMapExp<OP, TA, DType, (ta|type::kMapper)>(src.self());
 }
-/*! 
- * \brief short hand for MakeExp, usage F<op>(src), create a unary operation expression 
+/*!
+ * \brief short hand for MakeExp, usage F<op>(src), create a unary operation expression
  * \param src source expression
  * \return the result expression
- * \tparam operator 
+ * \tparam operator
  * \tparam TA source expression
  * \tparam ta source expression type
  * \sa mshadow::op
diff --git a/mshadow/extension/broadcast_with_axis.h b/mshadow/extension/broadcast_with_axis.h
@@ -55,7 +55,7 @@ struct BroadcastWithAxisExp:
     } else {
       CHECK(dimdst > axis && axis >= 0) << "broadcast axis (keepdim) out of bound, " <<
         "axis must be between 0 and" << dimdst - 1 << ", given=" << axis << ".";
-      CHECK_EQ(src_shape[axis], 1) << "Size of the dimension of the broadcasting axis must be 1" <<
+      CHECK_EQ(src_shape[axis], 1U) << "Size of the dimension of the broadcasting axis must be 1" <<
         " when keepdim is on, src_shape[" << axis << "]=" << src_shape[axis] << ".";
       for (int i = 0; i <= axis - 1; ++i) {
         this->shape_[i] = src_shape[i];
@@ -137,7 +137,7 @@ struct BroadcastWithMultiAxesExp :
       CHECK(dimsrc > axes[i]) << "broadcast axis (keepdim) out of bound, " <<
         "all axes must be between 0 and" << dimsrc - 1 << ", given axes[" << i << "] = " << axes[i]
         << ".";
-      CHECK_EQ(src_shape[axes[i]], 1) << "Size of the dimension of the broadcasting axis must be 1"
+      CHECK_EQ(src_shape[axes[i]], 1U) << "Size of the dimension of the broadcasting axis must be 1"
         << ", src_shape[" << axes[i] << "]=" << src_shape[axes[i]] << ".";
       if (i < this->axesnum_ - 1) {
         CHECK(axes[i] < axes[i + 1]) << "The given axes must be in increasing order.";
diff --git a/mshadow/extension/channel_pool.h b/mshadow/extension/channel_pool.h
@@ -57,7 +57,7 @@ inline ChannelPoolingExp<Reducer, SrcExp, DType, ExpInfo<SrcExp>::kDim>
 chpool(const Exp<SrcExp, DType, etype> &src, index_t nsize) {
   TypeCheckPass<ExpInfo<SrcExp>::kDim >= 3>
       ::Error_Expression_Does_Not_Meet_Dimension_Req();
-  CHECK_EQ(nsize % 2, 1) << "chpool: if no pad is specified, local size must be odd";
+  CHECK_EQ(nsize % 2, 1U) << "chpool: if no pad is specified, local size must be odd";
   return ChannelPoolingExp<Reducer, SrcExp,
                            DType, ExpInfo<SrcExp>::kDim>(src.self(), nsize, 1, nsize / 2);
 }
diff --git a/mshadow/extension/implicit_gemm.h b/mshadow/extension/implicit_gemm.h
@@ -67,14 +67,15 @@ struct Plan<ImplicitGEMMExp<LhsExp, RhsExp, DType>, DType> {
     typedef packet::Packet<DType> Packet;
     Packet sum = Packet::Fill(0);
 
-    DType lhs_temp[Packet::kSize], rhs_temp[Packet::kSize];
+    const size_t packetSize = Packet::Size();
+    DType lhs_temp[packetSize], rhs_temp[packetSize];
 
-    for (index_t i = 0; i < prod_size_lower_align_; i += packet::Packet<DType>::kSize) {
+    for (index_t i = 0; i < prod_size_lower_align_; i += packetSize) {
       // unroll
-      for (index_t j = 0; j < Packet::kSize; ++j) {
+      for (index_t j = 0; j < packetSize; ++j) {
         lhs_temp[j] = lhs_.Eval(y, i + j);
       }
-      for (index_t j = 0; j < Packet::kSize; ++j) {
+      for (index_t j = 0; j < packetSize; ++j) {
         rhs_temp[j] = rhs_.Eval(i + j, x);
       }
       sum = sum + Packet::LoadUnAligned(lhs_temp) * Packet::LoadUnAligned(rhs_temp);
diff --git a/mshadow/extension/transpose.h b/mshadow/extension/transpose.h
@@ -16,9 +16,9 @@ namespace expr {
  * output: Tensor<Device,dimdst> oshape[a1],oshape[a2] = ishape[a2],oshape[a1]
  *
  * \tparam SrcExp type of source expression
- * \tparam DType the type of elements 
+ * \tparam DType the type of elements
  * \tparam dimsrc source dimension, assert a1 > a2
- * \tparam m_a1 one dimension to be swapped, encoded by dimsrc - a1 
+ * \tparam m_a1 one dimension to be swapped, encoded by dimsrc - a1
  * \tparam a2 second dimension to be swapped, encoded by a2
  */
 template<typename SrcExp, typename DType, int dimsrc>
@@ -50,7 +50,7 @@ struct TransposeExExp:
  * \tparam a1 higher dimension to be swapped, assert a1 > a2
  * \tparam a2 lower dimension to be swapped
  * \tparam SrcExp source expression
- * \tparam DType the type of elements 
+ * \tparam DType the type of elements
  * \tparam etype source expression type
  */
 template<typename SrcExp, typename DType, int etype>
@@ -110,7 +110,7 @@ struct TransposeIndicesExp:
     Shape<dimsrc> dst_stride_;
     bool axes_checking_flag[dimsrc] = { 0 };
     for (int i = 0; i < dimsrc; ++i) {
-      CHECK_LT(axes[i], dimsrc)
+      CHECK_LT(static_cast<int>(axes[i]), dimsrc)
         << "Invalid axes input! All elements of axes must be between 0 and " << dimsrc
         << ", find axes=" << axes;
       dst_shape_[i] = src_shape[axes[i]];
diff --git a/mshadow/packet-inl.h b/mshadow/packet-inl.h
@@ -214,7 +214,7 @@ class PacketPlan {
  public:
   /*!
    * \brief evaluate the expression at index [y][x],
-   * x will be aligned to Packet<DType, Arch>::kSize
+   * x will be aligned to Packet<DType, Arch>::Size()
    */
   MSHADOW_CINLINE packet::Packet<DType, Arch> EvalPacket(index_t y, index_t x) const;
   MSHADOW_CINLINE DType Eval(index_t y, index_t x) const;
@@ -395,11 +395,12 @@ inline void MapPacketPlan(Tensor<cpu, dim, DType> _dst,
                           const expr::PacketPlan<E, DType, Arch>& plan) {
   Tensor<cpu, 2, DType> dst = _dst.FlatTo2D();
   const index_t xlen = packet::LowerAlign<DType, Arch>(dst.size(1));
+  const size_t packetSize = packet::Packet<DType, Arch>::Size();
 #if (MSHADOW_USE_CUDA == 0)
   #pragma omp parallel for
 #endif
   for (openmp_index_t y = 0; y < dst.size(0); ++y) {
-    for (index_t x = 0; x < xlen; x += packet::Packet<DType, Arch>::kSize) {
+    for (index_t x = 0; x < xlen; x += packetSize) {
       packet::Saver<SV, DType, Arch>::Save(&dst[y][x], plan.EvalPacket(y, x));
     }
     for (index_t x = xlen; x < dst.size(1); ++x) {
diff --git a/mshadow/packet/plain-inl.h b/mshadow/packet/plain-inl.h
@@ -15,7 +15,7 @@ template<typename DType>
 struct Packet<DType, kPlain> {
  public:
   /*! \brief number of float in vector */
-  static const index_t kSize = 1;
+  static inline index_t Size() { return 1; }
   /*! \brief The internal data */
   DType data_;
   // enable default copy constructor
diff --git a/mshadow/packet/sse-inl.h b/mshadow/packet/sse-inl.h
@@ -17,7 +17,7 @@ template<>
 struct Packet<float, kSSE2> {
  public:
   /*! \brief number of float in vector */
-  static const index_t kSize = 4;
+  static inline index_t Size() { return 4; }
   /*! \brief The internal data */
   __m128 data_;
   // enable default copy constructor
@@ -63,7 +63,7 @@ struct Packet<float, kSSE2> {
 template<>
 struct Packet<double, kSSE2> {
   /*! \brief number of float in vector */
-  static const index_t kSize = 2;
+  static inline index_t Size() { return 2; }
   // internal data
   __m128d data_;
   // constructor
diff --git a/mshadow/tensor_blob.h b/mshadow/tensor_blob.h
@@ -259,7 +259,8 @@ struct TShape {
    */
   template<int dim>
   inline Shape<dim> get(void) const {
-    CHECK_EQ(dim, ndim_) << "dimension do not match target dimension " << dim << " vs " << ndim_;
+    CHECK_EQ(dim, static_cast<int>(ndim_)) << "dimension do not match target dimension "
+      << dim << " vs " << ndim_;
     const index_t *d = this->data();
     Shape<dim> s;
     for (int i = 0; i < dim; ++i) {
diff --git a/mshadow/tensor_cpu-inl.h b/mshadow/tensor_cpu-inl.h
@@ -210,7 +210,7 @@ inline void MapReduceKeepLowest(TRValue<R, cpu, 1, DType> *dst,
       ::Check(exp.self()).FlatTo2D();
   Shape<1> dshape = expr::ShapeCheck<1, R>::Check(dst->self());
   CHECK_EQ(eshape[1], dshape[0]) << "MapReduceKeepLowest::reduction dimension do not match";
-  CHECK_NE(eshape[0], 0) << "can not reduce over empty tensor";
+  CHECK_NE(eshape[0], 0U) << "can not reduce over empty tensor";
   // execution
   expr::Plan<R, DType> dplan = MakePlan(dst->self());
   expr::Plan<E, DType> splan = MakePlan(exp.self());
diff --git a/mshadow/tensor_gpu-inl.h b/mshadow/tensor_gpu-inl.h
@@ -126,7 +126,7 @@ inline void MapReduceKeepLowest(TRValue<R, gpu, 1, DType> *dst,
       ::Check(exp.self()).FlatTo2D();
   Shape<1> dshape = expr::ShapeCheck<1, R>::Check(dst->self());
   CHECK_EQ(eshape[1], dshape[0]) << "MapReduceKeepLowest::reduction dimension do not match";
-  CHECK_NE(eshape[0], 0) << "can not reduce over empty tensor";
+  CHECK_NE(eshape[0], 0U) << "can not reduce over empty tensor";
   cuda::MapReduceKeepLowest<Saver, Reducer>
       (MakePlan(dst->self()), MakePlan(exp.self()), scale, eshape,
        Stream<gpu>::GetStream(expr::StreamInfo<gpu, R>::Get(dst->self())));

Original file line number	Diff line number	Diff line change
`@@ -57,7 +57,7 @@ inline ChannelPoolingExp<Reducer, SrcExp, DType, ExpInfo<SrcExp>::kDim>`
`57`	`57`	`chpool(const Exp<SrcExp, DType, etype> &src, index_t nsize) {`
`58`	`58`	`TypeCheckPass<ExpInfo<SrcExp>::kDim >= 3>`
`59`	`59`	`::Error_Expression_Does_Not_Meet_Dimension_Req();`
`60`		`- CHECK_EQ(nsize % 2, 1) << "chpool: if no pad is specified, local size must be odd";`
	`60`	`+ CHECK_EQ(nsize % 2, 1U) << "chpool: if no pad is specified, local size must be odd";`
`61`	`61`	`return ChannelPoolingExp<Reducer, SrcExp,`
`62`	`62`	`DType, ExpInfo<SrcExp>::kDim>(src.self(), nsize, 1, nsize / 2);`
`63`	`63`	`}`