refactor(ttm): add namespace ttm

bassoy · Nov 1, 2024 · fb3c0fa · fb3c0fa
1 parent c7c4cd4
commit fb3c0fa
Show file tree

Hide file tree

Showing 20 changed files with 600 additions and 637 deletions.
diff --git a/example/interface1.cpp b/example/interface1.cpp
@@ -4,11 +4,12 @@
 #include <numeric>
 #include <iostream>
 
+using namespace tlib::ttm;
 
 int main()
 {
     using value_t    = float;
-    using tensor_t   = tlib::tensor<value_t>;     // or std::array<value_t,N>
+    using tensor_t   = tensor<value_t>;     // or std::array<value_t,N>
     using shape_t    = typename tensor_t::shape_t;
 
     // shape tuple for A
@@ -25,8 +26,8 @@ int main()
     auto pb = nb.size();
 
     // layout tuple for A and C
-    auto pia = tlib::detail::generate_k_order_layout(pa,1ul);
-    auto pib = tlib::detail::generate_k_order_layout(pb,1ul);
+    auto pia = detail::generate_k_order_layout(pa,1ul);
+    auto pib = detail::generate_k_order_layout(pb,1ul);
 
     auto A = tensor_t( na, pia );
     auto B = tensor_t( nb, pib );

diff --git a/example/interface2.cpp b/example/interface2.cpp
@@ -4,11 +4,12 @@
 #include <numeric>
 #include <iostream>
 
+using namespace tlib::ttm;
 
 int main()
 {
     using value_t    = float;
-    using tensor_t   = tlib::tensor<value_t>;     // or std::array<value_t,N>
+    using tensor_t   = tensor<value_t>;     // or std::array<value_t,N>
     using shape_t    = typename tensor_t::shape_t;
 
     // shape tuple for A
@@ -25,8 +26,8 @@ int main()
     auto pb = nb.size();
 
     // layout tuple for A and C
-    auto pia = tlib::detail::generate_k_order_layout(pa,1ul);
-    auto pib = tlib::detail::generate_k_order_layout(pb,1ul);
+    auto pia = detail::generate_k_order_layout(pa,1ul);
+    auto pib = detail::generate_k_order_layout(pb,1ul);
 
     auto A = tensor_t( na, pia );
     auto B = tensor_t( nb, pib );
@@ -55,10 +56,10 @@ int main()
 
 
   // correct shape, layout and strides of the output tensors C1,C2 are automatically computed and returned by the functions.  
-    auto C1 = tlib::ttm(q, A,B, tlib::parallel_policy::parallel_blas , tlib::slicing_policy::slice,     tlib::fusion_policy::none );
-    auto C2 = tlib::ttm(q, A,B, tlib::parallel_policy::parallel_loop , tlib::slicing_policy::slice,     tlib::fusion_policy::all );
-    auto C3 = tlib::ttm(q, A,B, tlib::parallel_policy::parallel_loop , tlib::slicing_policy::subtensor, tlib::fusion_policy::all );
-    auto C4 = tlib::ttm(q, A,B, tlib::parallel_policy::batched_gemm  , tlib::slicing_policy::subtensor, tlib::fusion_policy::all );
+    auto C1 = ttm(q, A,B, parallel_policy::parallel_blas , slicing_policy::slice,     fusion_policy::none );
+    auto C2 = ttm(q, A,B, parallel_policy::parallel_loop , slicing_policy::slice,     fusion_policy::all );
+    auto C3 = ttm(q, A,B, parallel_policy::parallel_loop , slicing_policy::subtensor, fusion_policy::all );
+    auto C4 = ttm(q, A,B, parallel_policy::batched_gemm  , slicing_policy::subtensor, fusion_policy::all );
 
 
     std::cout << "C1 = " << C1 << std::endl;

diff --git a/example/interface3.cpp b/example/interface3.cpp
@@ -4,23 +4,24 @@
 #include <numeric>
 #include <iostream>
 
+using namespace tlib::ttm;
 
 int main()
 {
     using value_t    = float;
     using size_t     = std::size_t;
     using tensor_t   = std::vector<value_t>;     // or std::array<value_t,N>
-    using shape_t   = std::vector<size_t>;
+    using shape_t    = std::vector<size_t>;
     using iterator_t = std::ostream_iterator<value_t>;
 
     auto na = shape_t{4,3,2};   // input shape tuple
     auto p = na.size();         // order of input tensor, i.e. number of dimensions - here 3
     auto k = 1ul;               // k-order of input tensor
     auto q = 2ul;
 
-    auto pia = tlib::detail::generate_k_order_layout(p,k);  //  layout tuple of input tensor - here {1,2,3};
-    auto wa  = tlib::detail::generate_strides(na,pia);      //  stride tuple of input tensor - here {1,4,12};
-    auto nna  = std::accumulate(na.begin(),na.end(),1ul,std::multiplies<>()); // number of elements of input tensor
+    auto pia = detail::generate_k_order_layout(p,k);  //  layout tuple of input tensor - here {1,2,3};
+    auto wa  = detail::generate_strides(na,pia);      //  stride tuple of input tensor - here {1,4,12};
+    auto nna = std::accumulate(na.begin(),na.end(),1ul,std::multiplies<>()); // number of elements of input tensor
 
     auto pib = shape_t{1,2};
     auto nb  = shape_t{na[q-1]+1,na[q-1]};
@@ -29,7 +30,7 @@ int main()
     auto nc = na;
     nc[q-1] = nb[0];
     auto pic = pia;
-    auto wc  = tlib::detail::generate_strides(nc,pic);
+    auto wc  = detail::generate_strides(nc,pic);
     auto nnc  = std::accumulate(nc.begin(),nc.end(),1ul,std::multiplies<>()); // number of elements of input tensor
 
 
@@ -43,15 +44,15 @@ int main()
     std::cout << "A = [ "; std::copy(A.begin(), A.end(), iterator_t(std::cout, " ")); std::cout << " ];" << std::endl;
     std::cout << "B = [ "; std::copy(B.begin(), B.end(), iterator_t(std::cout, " ")); std::cout << " ];" << std::endl;
 
-    tlib::ttm(
-        tlib::parallel_policy::parallel_blas , tlib::slicing_policy::slice,  tlib::fusion_policy::none,
+    ttm(
+        parallel_policy::parallel_blas , slicing_policy::slice,  fusion_policy::none,
         q, p,
         A.data(), na.data(), wa.data(), pia.data(),
         B.data(), nb.data(),            pib.data(),
         C1.data(), nc.data(), wc.data());
 
-    tlib::ttm(
-        tlib::parallel_policy::parallel_loop, tlib::slicing_policy::subtensor, tlib::fusion_policy::all,
+    ttm(
+        parallel_policy::parallel_loop, slicing_policy::subtensor, fusion_policy::all,
         q, p,
         A.data(), na.data(), wa.data(), pia.data(),
         B.data(), nb.data(),            pib.data(),

diff --git a/example/measure.cpp b/example/measure.cpp
@@ -6,6 +6,8 @@
 #include <string>
 #include <chrono> // for high precision timing
 
+using namespace tlib::ttm;
+
 static const auto gdims = std::string("abcdefghij");
 
 inline 
@@ -72,9 +74,9 @@ get_gflops(double nn, double cdimc, double cdima)
 
 template<class value, class parallel_policy, class slicing_policy, class fusion_policy>
 inline void measure(unsigned q, 
-                    tlib::tensor<value> const& A, 
-                    tlib::tensor<value> const& B, 
-                    tlib::tensor<value>& C,
+                    tensor<value> const& A, 
+                    tensor<value> const& B, 
+                    tensor<value>& C,
                     parallel_policy pp,
                     slicing_policy sp,
                     fusion_policy fp)
@@ -87,8 +89,7 @@ inline void measure(unsigned q,
     for(auto i = 0u; i < iters; ++i){
         std::fill(cache.begin(), cache.end(),char{});
         auto start = std::chrono::high_resolution_clock::now();
-        tlib::ttm(
-            pp, sp, fp,
+        ttm(pp, sp, fp,
             q, A.order(),
             A.data().data(), A.shape().data(), A.strides().data(), A.layout().data(),
             B.data().data(), B.shape().data(),                     B.layout().data(),
@@ -109,7 +110,7 @@ inline void measure(unsigned q,
     std::cout << "Time : " << avg_time_s << " [s]" << std::endl;
     std::cout << "Gflops : " <<  gflops << " [gflops]" << std::endl;
     std::cout << "Performance : " <<  gflops/avg_time_s << " [gflops/s]" << std::endl;
-    std::cout << "Performance : " <<  gflops/avg_time_s/tlib::detail::cores << " [gflops/s/core]" << std::endl;
+    std::cout << "Performance : " <<  gflops/avg_time_s/detail::cores << " [gflops/s/core]" << std::endl;
 }
 
 
@@ -122,7 +123,7 @@ int main(int argc, char* argv[])
 {
 
     using value    = double;
-    using tensor   = tlib::tensor<value>;     // or std::array<value_t,N>
+    using tensor   = tensor<value>;     // or std::array<value_t,N>
     using shape    = typename tensor::shape_t;
 
     assert(argc > 4);
@@ -159,9 +160,9 @@ int main(int argc, char* argv[])
     const auto pc = pa;
 
     // layout tuple for A and C
-    const auto pia = tlib::detail::generate_k_order_layout(pa,1ul);
-    const auto pib = tlib::detail::generate_k_order_layout(pb,1ul);
-    const auto pic = tlib::detail::generate_k_order_layout(pc,1ul);
+    const auto pia = detail::generate_k_order_layout(pa,1ul);
+    const auto pib = detail::generate_k_order_layout(pb,1ul);
+    const auto pic = detail::generate_k_order_layout(pc,1ul);
 
     auto A = tensor( na, pia );
     auto B = tensor( nb, pib );
@@ -172,37 +173,37 @@ int main(int argc, char* argv[])
 
     if(method == 1 || method == 7){
       std::cout << "Algorithm: <par-loop | slice-2d, all>" << std::endl;
-      measure(q, A, B, C, tlib::parallel_policy::parallel_loop,   tlib::slicing_policy::slice,     tlib::fusion_policy::all  );
+      measure(q, A, B, C, parallel_policy::parallel_loop,   slicing_policy::slice,     fusion_policy::all  );
       std::cout << "---------" << std::endl << std::endl;
     }
 
     if(method == 2 || method == 7){
       std::cout << "Algorithm: <par-loop | subtensor, all>" << std::endl;
-      measure(q, A, B, C, tlib::parallel_policy::parallel_loop,   tlib::slicing_policy::subtensor, tlib::fusion_policy::all );
+      measure(q, A, B, C, parallel_policy::parallel_loop,   slicing_policy::subtensor, fusion_policy::all );
       std::cout << "---------" << std::endl << std::endl;
     }
 
     if(method == 3 || method == 7){
       std::cout << "Algorithm: <par-gemm | slice-2d, none>" << std::endl;
-      measure(q, A, B, C, tlib::parallel_policy::parallel_blas, tlib::slicing_policy::slice,     tlib::fusion_policy::none );
+      measure(q, A, B, C, parallel_policy::parallel_blas, slicing_policy::slice,     fusion_policy::none );
       std::cout << "---------" << std::endl << std::endl; 
     }
 
     if(method == 4 || method == 7){
       std::cout << "Algorithm: <par-gemm | slice-2d, all>" << std::endl;
-      measure(q, A, B, C, tlib::parallel_policy::parallel_blas, tlib::slicing_policy::slice,     tlib::fusion_policy::all );
+      measure(q, A, B, C, parallel_policy::parallel_blas, slicing_policy::slice,     fusion_policy::all );
       std::cout << "---------" << std::endl << std::endl; 
     }    
 
     if(method == 5 || method == 7){
       std::cout << "Algorithm: <par-gemm | subtensor, none>" << std::endl;
-      measure(q, A, B, C, tlib::parallel_policy::parallel_blas, tlib::slicing_policy::subtensor, tlib::fusion_policy::none );
+      measure(q, A, B, C, parallel_policy::parallel_blas, slicing_policy::subtensor, fusion_policy::none );
       std::cout << "---------" << std::endl << std::endl;  
     } 
 
     if(method == 6 || method == 7){
       std::cout << "Algorithm: <par-gemm | slice-qd, all>" << std::endl;
-      measure(q, A, B, C, tlib::parallel_policy::parallel_blas, tlib::slicing_policy::subtensor, tlib::fusion_policy::all );
+      measure(q, A, B, C, parallel_policy::parallel_blas, slicing_policy::subtensor, fusion_policy::all );
       std::cout << "---------" << std::endl << std::endl;  
     } 
 

diff --git a/include/tlib/detail/cases.h b/include/tlib/detail/cases.h
@@ -19,13 +19,13 @@
 
 #include <stdexcept>
 
-namespace tlib::detail{
+namespace tlib::ttm::detail{
 
 
 template<unsigned case_nr>
 inline constexpr bool is_case(unsigned p, std::size_t q, std::size_t const*const pi)
 {
-    static_assert(case_nr >  0u || case_nr < 9u, "tlib::detail::is_case: only 8 cases from 1 to 8 are covered.");
+    static_assert(case_nr >  0u || case_nr < 9u, "tlib::ttm::detail::is_case: only 8 cases from 1 to 8 are covered.");
 	if constexpr (case_nr == 1u) return p==1u;                            
 	if constexpr (case_nr == 2u) return p==2u && q == 1u && pi[0] == 1u;  
 	if constexpr (case_nr == 3u) return p==2u && q == 2u && pi[0] == 1u;
@@ -36,20 +36,4 @@ inline constexpr bool is_case(unsigned p, std::size_t q, std::size_t const*const
     if constexpr (case_nr == 8u) return p>=3u && !(is_case<6u>(p,q,pi)||is_case<7u>(p,q,pi));
 }
 
-
-//// assume that the input matrix (2nd argument) with a column-major format
-//template<unsigned case_nr>
-//inline constexpr bool is_case(unsigned p, std::size_t q, std::size_t const*const pi)
-//{
-//	static_assert(case_nr >  0u || case_nr < 9u, "tlib::detail::is_case: only 8 cases from 1 to 8 are covered.");
-//	if constexpr (case_nr == 1u) return p==1u;
-//	if constexpr (case_nr == 2u) return p==2u && q == 1u && pi[0] == 1u;
-//	if constexpr (case_nr == 3u) return p==2u && q == 2u && pi[0] == 1u;
-//	if constexpr (case_nr == 4u) return p==2u && q == 1u && pi[0] == 2u;
-//	if constexpr (case_nr == 5u) return p==2u && q == 2u && pi[0] == 2u;
-//	if constexpr (case_nr == 6u) return p>=3u && pi[0]   == q;
-//	if constexpr (case_nr == 7u) return p>=3u && pi[p-1] == q;
-//	if constexpr (case_nr == 8u) return p>=3u && !(is_case<6u>(p,q,pi)||is_case<7u>(p,q,pi));
-//}
-
-} // namespace tlib::detail
+} // namespace tlib::ttm::detail
diff --git a/include/tlib/detail/index.h b/include/tlib/detail/index.h
@@ -17,7 +17,7 @@
 
 #pragma once
 
-namespace tlib::detail
+namespace tlib::ttm::detail
 {
 
 
@@ -126,4 +126,4 @@ constexpr auto at_at_1(size_type const j_view, container_type const& w_view, con
 
 
 
-} // namespace detail
+} // namespace tlib::ttm::detail
diff --git a/include/tlib/detail/layout.h b/include/tlib/detail/layout.h
@@ -23,7 +23,7 @@
 
 
 
-namespace tlib::detail
+namespace tlib::ttm::detail
 {
 
 template<class InputIt>
@@ -60,7 +60,7 @@ inline void compute_k_order_layout(OutputIt begin, OutputIt end, size_t k)
 	auto const n_signed = std::distance(begin,end);
 
 	if(n_signed <= 0) 
-		throw std::runtime_error("Error in tlib::detail::compute_k_order: range provided by begin and end not correct!");
+        throw std::runtime_error("Error in tlib::ttm::detail::compute_k_order: range provided by begin and end not correct!");
 
 	auto const n = static_cast<std::make_unsigned_t<decltype(n_signed)>>(n_signed);
 	assert(n > 0);
@@ -122,16 +122,16 @@ inline auto inverse_mode(InputIt layout_begin, InputIt layout_end, SizeType mode
 {		
   using value_type = typename std::iterator_traits<InputIt>::value_type;
 	if(!is_valid_layout(layout_begin,layout_end))
-		throw std::runtime_error("Error in tlib::detail::inverse_mode(): input layout is not valid.");
+        throw std::runtime_error("Error in tlib::ttm::detail::inverse_mode(): input layout is not valid.");
 
 	auto const p_ = std::distance(layout_begin,layout_end);
 	if(p_<= 0)
-		throw std::runtime_error("Error in tlib::detail::inverse_mode(): input layout is invalid.");
+        throw std::runtime_error("Error in tlib::ttm::detail::inverse_mode(): input layout is invalid.");
 
 	auto const p = static_cast<value_type>(p_);
 
   if(mode==0u || mode > SizeType(p))
-    throw std::runtime_error("Error in tlib::detail::inverse_mode(): mode should be one-based and equal to or less than layout size.");
+    throw std::runtime_error("Error in tlib::ttm::detail::inverse_mode(): mode should be one-based and equal to or less than layout size.");
 
 	auto inverse_mode = value_type{0u};
 	for(; inverse_mode < p; ++inverse_mode)
@@ -146,4 +146,4 @@ inline auto inverse_mode(InputIt layout_begin, InputIt layout_end, SizeType mode
 
 
 
-} // namespace tlib::detail
+} // namespace tlib::ttm::detail
diff --git a/include/tlib/detail/mtm.h b/include/tlib/detail/mtm.h
@@ -46,7 +46,7 @@
 
 
 
-namespace tlib::detail {
+namespace tlib::ttm::detail {
 
 struct cblas_layout {};
 
@@ -223,4 +223,4 @@ inline void mtm_cm(unsigned const q, unsigned const p,
 }  
 
 
-} // namespace tlib::detail
+} // namespace tlib::ttm::detail
diff --git a/include/tlib/detail/shape.h b/include/tlib/detail/shape.h
@@ -25,7 +25,7 @@
 #include <vector>
 
 
-namespace tlib::detail
+namespace tlib::ttm::detail
 {
 
 template<class InputIt>
@@ -93,4 +93,4 @@ inline bool is_tensor(InputIt begin, InputIt end)
 
 
 
-} // namespace tlib::detail
+} // namespace tlib::ttm::detail
diff --git a/include/tlib/detail/strides.h b/include/tlib/detail/strides.h
@@ -25,7 +25,7 @@
 #include "layout.h"
 
 
-namespace tlib::detail
+namespace tlib::ttm::detail
 {
 
 template<class InputIt1, class InputIt2, class OutputIt>
@@ -100,4 +100,4 @@ inline bool is_valid_strides(InputIt1 layout_begin, InputIt1 layout_end, InputIt
 	//	[stride_begin]( auto l ) {return stride_begin[l-2] > stride_begin[l-1];} );
 }
 
-} // namespace tlib::detail
+} // namespace tlib::ttm::detail