From 4d5a5387d7c5f4a26b0f919087cd1fc297dfac40 Mon Sep 17 00:00:00 2001 From: Johannes Pietrzyk Date: Mon, 3 Jul 2023 17:03:42 +0200 Subject: [PATCH 01/47] added som primitive implementations for fpga --- primitive_data/primitives/binary.yaml | 2 +- primitive_data/primitives/calc.yaml | 41 +++++++- primitive_data/primitives/compare.yaml | 131 ++++++++++++++++++++++++- primitive_data/primitives/ls.yaml | 24 +++++ 4 files changed, 191 insertions(+), 7 deletions(-) diff --git a/primitive_data/primitives/binary.yaml b/primitive_data/primitives/binary.yaml index 53a7bb34..8168fe64 100644 --- a/primitive_data/primitives/binary.yaml +++ b/primitive_data/primitives/binary.yaml @@ -1004,7 +1004,7 @@ definitions: T result{}; #pragma unroll for(size_t i = 0; i < Vec::vector_element_count(); ++i) { - result[i] = data[i] >> shift[i]; + result[i] = data[i] >> shift[i]; } return result; testing: diff --git a/primitive_data/primitives/calc.yaml b/primitive_data/primitives/calc.yaml index 5aa8dc69..9efef1c4 100644 --- a/primitive_data/primitives/calc.yaml +++ b/primitive_data/primitives/calc.yaml @@ -495,6 +495,19 @@ definitions: ctype: [ "uint8_t", "uint16_t", "uint32_t", "uint64_t", "int8_t", "int16_t", "int32_t", "int64_t", "float", "double" ] lscpu_flags: [ ] implementation: "return vec_a * vec_b;" +# FPGA + - target_extension: ["oneAPIfpga", "oneAPIfpgaRTL"] + ctype: ["uint8_t", "int8_t", "uint16_t", "int16_t", "uint32_t", "int32_t", "uint64_t", "int64_t", "float", "double"] + lscpu_flags: ["oneAPIfpgaDev"] + vector_length_agnostic: True + implementation: | + using T = typename Vec::register_type; + T result; //initialize the result + #pragma unroll + for(int i = 0; i < Vec::vector_element_count(); ++i) { + result[i] = vec_a[i] * vec_b[i]; + } + return result; ... --- primitive_name: "hadd" @@ -820,7 +833,19 @@ definitions: implementation: | if (vec_a > vec_b) return vec_b; return vec_a; -#INTEL - FPGA +# FPGA + - target_extension: ["oneAPIfpga", "oneAPIfpgaRTL"] + ctype: ["uint8_t", "int8_t", "uint16_t", "int16_t", "uint32_t", "int32_t", "uint64_t", "int64_t", "float", "double"] + lscpu_flags: ["oneAPIfpgaDev"] + vector_length_agnostic: True + implementation: | + using T = typename Vec::register_type; + T result; //initialize the result + #pragma unroll + for(int i = 0; i < Vec::vector_element_count(); ++i) { + result[i] = vec_a[i] < vec_b[i] ? vec_a[i] : vec_b[i]; + } + return result; ... --- primitive_name: "div" @@ -981,7 +1006,19 @@ definitions: ctype: [ "uint8_t", "uint16_t", "uint32_t", "uint64_t", "int8_t", "int16_t", "int32_t", "int64_t", "float", "double" ] lscpu_flags: [ ] implementation: return vec_a / vec_b; -#INTEL - FPGA +# FPGA + - target_extension: ["oneAPIfpga", "oneAPIfpgaRTL"] + ctype: ["uint8_t", "int8_t", "uint16_t", "int16_t", "uint32_t", "int32_t", "uint64_t", "int64_t", "float", "double"] + lscpu_flags: ["oneAPIfpgaDev"] + vector_length_agnostic: True + implementation: | + using T = typename Vec::register_type; + T result; //initialize the result + #pragma unroll + for(int i = 0; i < Vec::vector_element_count(); ++i) { + result[i] = vec_a[i] / vec_b[i]; + } + return result; ... --- primitive_name: 'mod' diff --git a/primitive_data/primitives/compare.yaml b/primitive_data/primitives/compare.yaml index 12cc839d..de183528 100644 --- a/primitive_data/primitives/compare.yaml +++ b/primitive_data/primitives/compare.yaml @@ -130,6 +130,26 @@ definitions: ctype: ["int8_t", "uint8_t", "int16_t", "uint16_t", "int32_t", "uint32_t", "int64_t", "uint64_t", "float", "double"] lscpu_flags: [] implementation: "return (vec_a == vec_b);" +#INTEL - FPGA + - target_extension: ["oneAPIfpga", "oneAPIfpgaRTL"] + ctype: ["uint8_t", "int8_t", "uint16_t", "int16_t", "uint32_t", "int32_t", "uint64_t", "int64_t", "float", "double"] + lscpu_flags: ["oneAPIfpgaDev"] + vector_length_agnostic: True + implementation: | + using T = typename Vec::register_type; + T result{}; + // Create a value with all bits set to 1, regardless of underlying type + {% if ctype in ["float", "double"] %} + memset((void*)&checker, 0xff, sizeof(checker)); + {% else %} + typename Vec::base_type checker = ~0; + {% endif %} + // + #pragma unroll + for(size_t i = 0; i < Vec::vector_element_count(); ++i) { + result[i] = (vec_a[i] == vec_b[i]) ? checker : 0; + } + return result; ... --- primitive_name: "equal" @@ -268,6 +288,26 @@ definitions: ctype: ["int8_t", "uint8_t", "int16_t", "uint16_t", "int32_t", "uint32_t", "int64_t", "uint64_t", "float", "double"] lscpu_flags: [] implementation: "return (vec_a != vec_b);" + #INTEL - FPGA + - target_extension: ["oneAPIfpga", "oneAPIfpgaRTL"] + ctype: ["uint8_t", "int8_t", "uint16_t", "int16_t", "uint32_t", "int32_t", "uint64_t", "int64_t", "float", "double"] + lscpu_flags: ["oneAPIfpgaDev"] + vector_length_agnostic: True + implementation: | + using T = typename Vec::register_type; + T result{}; + // Create a value with all bits set to 1, regardless of underlying type + {% if ctype in ["float", "double"] %} + memset((void*)&checker, 0xff, sizeof(checker)); + {% else %} + typename Vec::base_type checker = ~0; + {% endif %} + // + #pragma unroll + for(size_t i = 0; i < Vec::vector_element_count(); ++i) { + result[i] = (vec_a[i] == vec_b[i]) ? 0 : checker; + } + return result; ... --- primitive_name: "between_inclusive" @@ -327,19 +367,22 @@ definitions: implementation: "return vandq_u64( vcgeq_s64( vec_data, vec_min ), vcleq_s64( vec_data, vec_max ) );" #INTEL - FPGA - target_extension: ["oneAPIfpga", "oneAPIfpgaRTL"] - ctype: ["uint8_t", "int8_t", "uint16_t", "int16_t", "uint32_t", "int32_t", "float"] + ctype: ["uint8_t", "int8_t", "uint16_t", "int16_t", "uint32_t", "int32_t", "uint64_t", "int64_t", "float", "double"] lscpu_flags: ["oneAPIfpgaDev"] vector_length_agnostic: True implementation: | using T = typename Vec::register_type; T result{}; // Create a value with all bits set to 1, regardless of underlying type - typename Vec::base_type checker; + {% if ctype in ["float", "double"] %} memset((void*)&checker, 0xff, sizeof(checker)); - + {% else %} + typename Vec::base_type checker = ~0; + {% endif %} + // #pragma unroll for(size_t i = 0; i < Vec::vector_element_count(); ++i) { - result[i] = ((vec_data[i] >= vec_min[i]) && (vec_data[i] <= vec_max[i])) ? checker : 0; + result[i] = ((vec_data[i] >= vec_min[i]) && (vec_data[i] <= vec_max[i])) ? checker : 0; } return result; ... @@ -466,6 +509,26 @@ definitions: ctype: ["int8_t", "uint8_t", "int16_t", "uint16_t", "int32_t", "uint32_t", "int64_t", "uint64_t", "float", "double"] lscpu_flags: [] implementation: "return (vec_a < vec_b);" +#INTEL - FPGA + - target_extension: ["oneAPIfpga", "oneAPIfpgaRTL"] + ctype: ["uint8_t", "int8_t", "uint16_t", "int16_t", "uint32_t", "int32_t", "uint64_t", "int64_t", "float", "double"] + lscpu_flags: ["oneAPIfpgaDev"] + vector_length_agnostic: True + implementation: | + using T = typename Vec::register_type; + T result{}; + // Create a value with all bits set to 1, regardless of underlying type + {% if ctype in ["float", "double"] %} + memset((void*)&checker, 0xff, sizeof(checker)); + {% else %} + typename Vec::base_type checker = ~0; + {% endif %} + // + #pragma unroll + for(size_t i = 0; i < Vec::vector_element_count(); ++i) { + result[i] = (vec_a[i] < vec_b[i]) ? checker : 0; + } + return result; ... --- primitive_name: "greater_than" @@ -587,6 +650,26 @@ definitions: ctype: ["int8_t", "uint8_t", "int16_t", "uint16_t", "int32_t", "uint32_t", "int64_t", "uint64_t", "float", "double"] lscpu_flags: [] implementation: "return (vec_a > vec_b);" +#INTEL - FPGA + - target_extension: ["oneAPIfpga", "oneAPIfpgaRTL"] + ctype: ["uint8_t", "int8_t", "uint16_t", "int16_t", "uint32_t", "int32_t", "uint64_t", "int64_t", "float", "double"] + lscpu_flags: ["oneAPIfpgaDev"] + vector_length_agnostic: True + implementation: | + using T = typename Vec::register_type; + T result{}; + // Create a value with all bits set to 1, regardless of underlying type + {% if ctype in ["float", "double"] %} + memset((void*)&checker, 0xff, sizeof(checker)); + {% else %} + typename Vec::base_type checker = ~0; + {% endif %} + // + #pragma unroll + for(size_t i = 0; i < Vec::vector_element_count(); ++i) { + result[i] = (vec_a[i] > vec_b[i]) ? checker : 0; + } + return result; ... --- primitive_name: "less_than_or_equal" @@ -719,6 +802,26 @@ definitions: ctype: ["int8_t", "uint8_t", "int16_t", "uint16_t", "int32_t", "uint32_t", "int64_t", "uint64_t", "float", "double"] lscpu_flags: [] implementation: "return (vec_a <= vec_b);" +#INTEL - FPGA + - target_extension: ["oneAPIfpga", "oneAPIfpgaRTL"] + ctype: ["uint8_t", "int8_t", "uint16_t", "int16_t", "uint32_t", "int32_t", "uint64_t", "int64_t", "float", "double"] + lscpu_flags: ["oneAPIfpgaDev"] + vector_length_agnostic: True + implementation: | + using T = typename Vec::register_type; + T result{}; + // Create a value with all bits set to 1, regardless of underlying type + {% if ctype in ["float", "double"] %} + memset((void*)&checker, 0xff, sizeof(checker)); + {% else %} + typename Vec::base_type checker = ~0; + {% endif %} + // + #pragma unroll + for(size_t i = 0; i < Vec::vector_element_count(); ++i) { + result[i] = (vec_a[i] <= vec_b[i]) ? checker : 0; + } + return result; ... --- primitive_name: "greater_than_or_equal" @@ -852,6 +955,26 @@ definitions: ctype: ["int8_t", "uint8_t", "int16_t", "uint16_t", "int32_t", "uint32_t", "int64_t", "uint64_t", "float", "double"] lscpu_flags: [] implementation: "return (vec_a >= vec_b);" +#INTEL - FPGA + - target_extension: ["oneAPIfpga", "oneAPIfpgaRTL"] + ctype: ["uint8_t", "int8_t", "uint16_t", "int16_t", "uint32_t", "int32_t", "uint64_t", "int64_t", "float", "double"] + lscpu_flags: ["oneAPIfpgaDev"] + vector_length_agnostic: True + implementation: | + using T = typename Vec::register_type; + T result{}; + // Create a value with all bits set to 1, regardless of underlying type + {% if ctype in ["float", "double"] %} + memset((void*)&checker, 0xff, sizeof(checker)); + {% else %} + typename Vec::base_type checker = ~0; + {% endif %} + // + #pragma unroll + for(size_t i = 0; i < Vec::vector_element_count(); ++i) { + result[i] = (vec_a[i] >= vec_b[i]) ? checker : 0; + } + return result; ... --- primitive_name: "unequal_zero" diff --git a/primitive_data/primitives/ls.yaml b/primitive_data/primitives/ls.yaml index 81223c95..2839c15e 100644 --- a/primitive_data/primitives/ls.yaml +++ b/primitive_data/primitives/ls.yaml @@ -253,6 +253,16 @@ definitions: ctype: ["uint8_t", "uint16_t", "uint32_t", "uint64_t", "int8_t", "int16_t", "int32_t", "int64_t", "float", "double"] lscpu_flags: [] implementation: "*memory = data;" +#FPGA + - target_extension: ["oneAPIfpga", "oneAPIfpgaRTL"] + ctype: ["uint8_t", "int8_t", "uint16_t", "int16_t", "uint32_t", "int32_t", "float", "uint64_t", "int64_t", "double"] + lscpu_flags: ["oneAPIfpgaDev"] + vector_length_agnostic: True + implementation: | + #pragma unroll + for (size_t idx = 0; idx < Vec::vector_element_count(); idx++) { + memory[idx] = data[idx]; + } ... --- primitive_name: "storeu" @@ -504,6 +514,19 @@ definitions: ctype: ["float", "double"] lscpu_flags: ["sse2"] implementation: "return _mm_setzero_{{ intrin_tp_full[ctype] }}();" +#FPGA + - target_extension: ["oneAPIfpga", "oneAPIfpgaRTL"] + ctype: ["uint8_t", "int8_t", "uint16_t", "int16_t", "uint32_t", "int32_t", "float", "uint64_t", "int64_t", "double"] + lscpu_flags: ["oneAPIfpgaDev"] + vector_length_agnostic: True + implementation: | + using T = typename Vec::register_type; + T result; //initialize the result + #pragma unroll + for(int i = 0; i < Vec::vector_element_count(); ++i) { + result[i] = 0; + } + return result; --- primitive_name: "set" brief_description: "Transfers provided elements into a vector register." @@ -838,6 +861,7 @@ definitions: ctype: [ "uint8_t", "uint16_t", "uint32_t", "uint64_t", "int8_t", "int16_t", "int32_t", "int64_t", "float", "double" ] lscpu_flags: [ ] implementation: "return *reinterpret_cast(reinterpret_cast(memory) + index * N);" + ... --- primitive_name: "gather" From f8a80a29df1551360d5a96537ea5fad240804315 Mon Sep 17 00:00:00 2001 From: Johannes Pietrzyk Date: Tue, 4 Jul 2023 14:52:22 +0200 Subject: [PATCH 02/47] added lzc_scalar and fixed error in inverse --- primitive_data/primitives/binary.yaml | 291 +++++++++++++++++++++++++- 1 file changed, 287 insertions(+), 4 deletions(-) diff --git a/primitive_data/primitives/binary.yaml b/primitive_data/primitives/binary.yaml index 8168fe64..b15a087a 100644 --- a/primitive_data/primitives/binary.yaml +++ b/primitive_data/primitives/binary.yaml @@ -1004,7 +1004,7 @@ definitions: T result{}; #pragma unroll for(size_t i = 0; i < Vec::vector_element_count(); ++i) { - result[i] = data[i] >> shift[i]; + result[i] = data[i] >> shift[i]; } return result; testing: @@ -1068,8 +1068,8 @@ parameters: name: "data" description: "First vector." returns: - ctype: "typename Vec::offset_base_register_type" - description: "Vector containing leading zeros number." + ctype: "typename Vec::offset_base_register_type" + description: "Vector containing leading zeros number." definitions: - target_extension: ["oneAPIfpgaRTL"] ctype: ["uint32_t", "int32_t", "float"] @@ -1113,6 +1113,289 @@ definitions: return result; ... --- +primitive_name: "lzc" +functor_name: "lzc_scalar" +parameters: + - ctype: "const typename Vec::base_type" + name: "data" + description: "Value." +returns: + ctype: "typename Vec::offset_base_type" + description: "Number of leading zeros." +testing: + - test_name: "test_zero" + includes: ["", "", ""] + implementation: | + using T = typename Vec::base_type; + T data = 0; + std::cout << "test_zero" << std::endl; + std::cout << lzc(data) << std::endl; + return lzc(data) == (sizeof(T)*CHAR_BIT); + # - test_name: "test_one" + # includes: ["", ""] + # implementation: | + # using T = typename Vec::base_type; + # T data = 0; + # return lzc(data) == (sizeof(T)*CHAR_BIT-1); + # - test_name: "test_msb" + # includes: ["", ""] + # implementation: | + # using T = typename Vec::base_type; + # T data = -1; + # return lzc(data) == 0; +definitions: + - target_extension: ["avx512"] + ctype: ["uint8_t", "uint16_t", "uint32_t", "uint64_t"] + lscpu_flags: ["avx512f"] + implementation: | + using T = typename Vec::base_type; + static_assert(sizeof(T) <= sizeof(unsigned long long), "Unsupported type"); + if constexpr(sizeof(T) <= sizeof(unsigned int)) { + #if __cplusplus >= 202002L + return std::countl_zero(value); + #else + return data == 0 ? std::numeric_limits::digits : __builtin_clz(data) - (sizeof(unsigned int) * CHAR_BIT - sizeof(T) * CHAR_BIT); + #endif + } else if constexpr(sizeof(T) <= sizeof(unsigned long)) { + #if __cplusplus >= 202002L + return std::countl_zero(value); + #else + return data == 0 ? std::numeric_limits::digits : __builtin_clzl(data) - (sizeof(unsigned long) * CHAR_BIT - sizeof(T) * CHAR_BIT); + #endif + } else if constexpr(sizeof(T) <= sizeof(unsigned long long)) { + #if __cplusplus >= 202002L + return std::countl_zero(value); + #else + return data == 0 ? std::numeric_limits::digits : __builtin_clzll(data) - (sizeof(unsigned long long) * CHAR_BIT - sizeof(T) * CHAR_BIT); + #endif + } + - target_extension: ["avx512"] + ctype: ["int8_t", "int16_t", "int32_t", "int64_t"] + lscpu_flags: ["avx512f"] + implementation: | + using T = typename Vec::base_type; + static_assert(sizeof(T) <= sizeof(unsigned long long), "Unsupported type"); + if constexpr(sizeof(T) <= sizeof(unsigned int)) { + #if __cplusplus >= 202002L + return std::countl_zero(value); + #else + return data == 0 ? std::numeric_limits::digits+1 : __builtin_clz(data) - (sizeof(unsigned int) * CHAR_BIT - sizeof(T) * CHAR_BIT); + #endif + } else if constexpr(sizeof(T) <= sizeof(unsigned long)) { + #if __cplusplus >= 202002L + return std::countl_zero(value); + #else + return data == 0 ? std::numeric_limits::digits+1 : __builtin_clzl(data) - (sizeof(unsigned long) * CHAR_BIT - sizeof(T) * CHAR_BIT); + #endif + } else if constexpr(sizeof(T) <= sizeof(unsigned long long)) { + #if __cplusplus >= 202002L + return std::countl_zero(value); + #else + return data == 0 ? std::numeric_limits::digits+1 : __builtin_clzll(data) - (sizeof(unsigned long long) * CHAR_BIT - sizeof(T) * CHAR_BIT); + #endif + } + - target_extension: ["avx2"] + ctype: ["uint8_t", "uint16_t", "uint32_t", "uint64_t"] + lscpu_flags: ["avx"] + implementation: | + using T = typename Vec::base_type; + static_assert(sizeof(T) <= sizeof(unsigned long long), "Unsupported type"); + if constexpr(sizeof(T) <= sizeof(unsigned int)) { + #if __cplusplus >= 202002L + return std::countl_zero(value); + #else + return data == 0 ? std::numeric_limits::digits : __builtin_clz(data) - (sizeof(unsigned int) * CHAR_BIT - sizeof(T) * CHAR_BIT); + #endif + } else if constexpr(sizeof(T) <= sizeof(unsigned long)) { + #if __cplusplus >= 202002L + return std::countl_zero(value); + #else + return data == 0 ? std::numeric_limits::digits : __builtin_clzl(data) - (sizeof(unsigned long) * CHAR_BIT - sizeof(T) * CHAR_BIT); + #endif + } else if constexpr(sizeof(T) <= sizeof(unsigned long long)) { + #if __cplusplus >= 202002L + return std::countl_zero(value); + #else + return data == 0 ? std::numeric_limits::digits : __builtin_clzll(data) - (sizeof(unsigned long long) * CHAR_BIT - sizeof(T) * CHAR_BIT); + #endif + } + - target_extension: ["avx2"] + ctype: ["int8_t", "int16_t", "int32_t", "int64_t"] + lscpu_flags: ["avx"] + implementation: | + using T = typename Vec::base_type; + static_assert(sizeof(T) <= sizeof(unsigned long long), "Unsupported type"); + if constexpr(sizeof(T) <= sizeof(unsigned int)) { + #if __cplusplus >= 202002L + return std::countl_zero(value); + #else + return data == 0 ? std::numeric_limits::digits+1 : __builtin_clz(data) - (sizeof(unsigned int) * CHAR_BIT - sizeof(T) * CHAR_BIT); + #endif + } else if constexpr(sizeof(T) <= sizeof(unsigned long)) { + #if __cplusplus >= 202002L + return std::countl_zero(value); + #else + return data == 0 ? std::numeric_limits::digits+1 : __builtin_clzl(data) - (sizeof(unsigned long) * CHAR_BIT - sizeof(T) * CHAR_BIT); + #endif + } else if constexpr(sizeof(T) <= sizeof(unsigned long long)) { + #if __cplusplus >= 202002L + return std::countl_zero(value); + #else + return data == 0 ? std::numeric_limits::digits+1 : __builtin_clzll(data) - (sizeof(unsigned long long) * CHAR_BIT - sizeof(T) * CHAR_BIT); + #endif + } + - target_extension: ["sse"] + ctype: ["uint8_t", "uint16_t", "uint32_t", "uint64_t"] + lscpu_flags: ["sse"] + implementation: | + using T = typename Vec::base_type; + static_assert(sizeof(T) <= sizeof(unsigned long long), "Unsupported type"); + if constexpr(sizeof(T) <= sizeof(unsigned int)) { + #if __cplusplus >= 202002L + return std::countl_zero(value); + #else + return data == 0 ? std::numeric_limits::digits : __builtin_clz(data) - (sizeof(unsigned int) * CHAR_BIT - sizeof(T) * CHAR_BIT); + #endif + } else if constexpr(sizeof(T) <= sizeof(unsigned long)) { + #if __cplusplus >= 202002L + return std::countl_zero(value); + #else + return data == 0 ? std::numeric_limits::digits : __builtin_clzl(data) - (sizeof(unsigned long) * CHAR_BIT - sizeof(T) * CHAR_BIT); + #endif + } else if constexpr(sizeof(T) <= sizeof(unsigned long long)) { + #if __cplusplus >= 202002L + return std::countl_zero(value); + #else + return data == 0 ? std::numeric_limits::digits : __builtin_clzll(data) - (sizeof(unsigned long long) * CHAR_BIT - sizeof(T) * CHAR_BIT); + #endif + } + - target_extension: ["sse"] + ctype: ["int8_t", "int16_t", "int32_t", "int64_t"] + lscpu_flags: ["sse"] + implementation: | + using T = typename Vec::base_type; + static_assert(sizeof(T) <= sizeof(unsigned long long), "Unsupported type"); + if constexpr(sizeof(T) <= sizeof(unsigned int)) { + #if __cplusplus >= 202002L + return std::countl_zero(value); + #else + return data == 0 ? std::numeric_limits::digits+1 : __builtin_clz(data) - (sizeof(unsigned int) * CHAR_BIT - sizeof(T) * CHAR_BIT); + #endif + } else if constexpr(sizeof(T) <= sizeof(unsigned long)) { + #if __cplusplus >= 202002L + return std::countl_zero(value); + #else + return data == 0 ? std::numeric_limits::digits+1 : __builtin_clzl(data) - (sizeof(unsigned long) * CHAR_BIT - sizeof(T) * CHAR_BIT); + #endif + } else if constexpr(sizeof(T) <= sizeof(unsigned long long)) { + #if __cplusplus >= 202002L + return std::countl_zero(value); + #else + return data == 0 ? std::numeric_limits::digits+1 : __builtin_clzll(data) - (sizeof(unsigned long long) * CHAR_BIT - sizeof(T) * CHAR_BIT); + #endif + } + - target_extension: ["neon"] + ctype: ["uint8_t", "uint16_t", "uint32_t", "uint64_t"] + lscpu_flags: ["neon"] + implementation: | + using T = typename Vec::base_type; + static_assert(sizeof(T) <= sizeof(unsigned long long), "Unsupported type"); + if constexpr(sizeof(T) <= sizeof(unsigned int)) { + #if __cplusplus >= 202002L + return std::countl_zero(value); + #else + return data == 0 ? std::numeric_limits::digits : __builtin_clz(data) - (sizeof(unsigned int) * CHAR_BIT - sizeof(T) * CHAR_BIT); + #endif + } else if constexpr(sizeof(T) <= sizeof(unsigned long)) { + #if __cplusplus >= 202002L + return std::countl_zero(value); + #else + return data == 0 ? std::numeric_limits::digits : __builtin_clzl(data) - (sizeof(unsigned long) * CHAR_BIT - sizeof(T) * CHAR_BIT); + #endif + } else if constexpr(sizeof(T) <= sizeof(unsigned long long)) { + #if __cplusplus >= 202002L + return std::countl_zero(value); + #else + return data == 0 ? std::numeric_limits::digits : __builtin_clzll(data) - (sizeof(unsigned long long) * CHAR_BIT - sizeof(T) * CHAR_BIT); + #endif + } + - target_extension: ["neon"] + ctype: ["int8_t", "int16_t", "int32_t", "int64_t"] + lscpu_flags: ["neon"] + implementation: | + using T = typename Vec::base_type; + static_assert(sizeof(T) <= sizeof(unsigned long long), "Unsupported type"); + if constexpr(sizeof(T) <= sizeof(unsigned int)) { + #if __cplusplus >= 202002L + return std::countl_zero(value); + #else + return data == 0 ? std::numeric_limits::digits+1 : __builtin_clz(data) - (sizeof(unsigned int) * CHAR_BIT - sizeof(T) * CHAR_BIT); + #endif + } else if constexpr(sizeof(T) <= sizeof(unsigned long)) { + #if __cplusplus >= 202002L + return std::countl_zero(value); + #else + return data == 0 ? std::numeric_limits::digits+1 : __builtin_clzl(data) - (sizeof(unsigned long) * CHAR_BIT - sizeof(T) * CHAR_BIT); + #endif + } else if constexpr(sizeof(T) <= sizeof(unsigned long long)) { + #if __cplusplus >= 202002L + return std::countl_zero(value); + #else + return data == 0 ? std::numeric_limits::digits+1 : __builtin_clzll(data) - (sizeof(unsigned long long) * CHAR_BIT - sizeof(T) * CHAR_BIT); + #endif + } + - target_extension: ["scalar"] + ctype: ["uint8_t", "uint16_t", "uint32_t", "uint64_t"] + lscpu_flags: [] + implementation: | + using T = typename Vec::base_type; + static_assert(sizeof(T) <= sizeof(unsigned long long), "Unsupported type"); + if constexpr(sizeof(T) <= sizeof(unsigned int)) { + #if __cplusplus >= 202002L + return std::countl_zero(value); + #else + return data == 0 ? std::numeric_limits::digits : __builtin_clz(data) - (sizeof(unsigned int) * CHAR_BIT - sizeof(T) * CHAR_BIT); + #endif + } else if constexpr(sizeof(T) <= sizeof(unsigned long)) { + #if __cplusplus >= 202002L + return std::countl_zero(value); + #else + return data == 0 ? std::numeric_limits::digits : __builtin_clzl(data) - (sizeof(unsigned long) * CHAR_BIT - sizeof(T) * CHAR_BIT); + #endif + } else if constexpr(sizeof(T) <= sizeof(unsigned long long)) { + #if __cplusplus >= 202002L + return std::countl_zero(value); + #else + return data == 0 ? std::numeric_limits::digits : __builtin_clzll(data) - (sizeof(unsigned long long) * CHAR_BIT - sizeof(T) * CHAR_BIT); + #endif + } + - target_extension: ["scalar"] + ctype: ["int8_t", "int16_t", "int32_t", "int64_t"] + lscpu_flags: [] + implementation: | + using T = typename Vec::base_type; + static_assert(sizeof(T) <= sizeof(unsigned long long), "Unsupported type"); + if constexpr(sizeof(T) <= sizeof(unsigned int)) { + #if __cplusplus >= 202002L + return std::countl_zero(value); + #else + return data == 0 ? std::numeric_limits::digits+1 : __builtin_clz(data) - (sizeof(unsigned int) * CHAR_BIT - sizeof(T) * CHAR_BIT); + #endif + } else if constexpr(sizeof(T) <= sizeof(unsigned long)) { + #if __cplusplus >= 202002L + return std::countl_zero(value); + #else + return data == 0 ? std::numeric_limits::digits+1 : __builtin_clzl(data) - (sizeof(unsigned long) * CHAR_BIT - sizeof(T) * CHAR_BIT); + #endif + } else if constexpr(sizeof(T) <= sizeof(unsigned long long)) { + #if __cplusplus >= 202002L + return std::countl_zero(value); + #else + return data == 0 ? std::numeric_limits::digits+1 : __builtin_clzll(data) - (sizeof(unsigned long long) * CHAR_BIT - sizeof(T) * CHAR_BIT); + #endif + } +... +--- primitive_name: "hor" brief_description: "Operates horizontal OR on vector register" parameters: @@ -1543,7 +1826,7 @@ definitions: T result{}; #pragma unroll for(size_t i = 0; i < Vec::vector_element_count(); ++i) { - result[i] = ~data[i]; + result[i] = ~vec[i]; } return result; ... From e995aea2902a1050268f2493e34059dfe3d91201 Mon Sep 17 00:00:00 2001 From: Johannes Pietrzyk Date: Tue, 4 Jul 2023 14:59:40 +0200 Subject: [PATCH 03/47] Added include if cxx20 is active --- generator/static_files/core/utils/functional.yaml | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/generator/static_files/core/utils/functional.yaml b/generator/static_files/core/utils/functional.yaml index ea79a419..a9ff4b22 100644 --- a/generator/static_files/core/utils/functional.yaml +++ b/generator/static_files/core/utils/functional.yaml @@ -12,6 +12,10 @@ includes: - '' - '' implementations: + - | + #if __cplusplus >= 202002L + #include + #endif - | namespace reducer { template From ef4a177ba23d90508a58edd094413fc7cc6908e7 Mon Sep 17 00:00:00 2001 From: Johannes Pietrzyk Date: Tue, 4 Jul 2023 15:33:47 +0200 Subject: [PATCH 04/47] Refactored lzc --- .../static_files/core/utils/functional.yaml | 33 ++- primitive_data/primitives/binary.yaml | 245 +----------------- 2 files changed, 42 insertions(+), 236 deletions(-) diff --git a/generator/static_files/core/utils/functional.yaml b/generator/static_files/core/utils/functional.yaml index a9ff4b22..e89de012 100644 --- a/generator/static_files/core/utils/functional.yaml +++ b/generator/static_files/core/utils/functional.yaml @@ -11,10 +11,41 @@ includes: - '' - '' - '' + - '' implementations: - | - #if __cplusplus >= 202002L + #if ((__cplusplus >= 202002L) && (__has_include())) #include + namespace details { + template + constexpr int clz(T data) { + return std::countl_zero(data); + } + } + #else + namespace details { + template + auto clz(T data) { + static_assert(sizeof(T) <= sizeof(unsigned long long), "Unsupported type"); + if constexpr(std::is_unsigned_v) { + if constexpr(sizeof(T) <= sizeof(unsigned int)) { + return data == 0 ? std::numeric_limits::digits : __builtin_clz(data) - (sizeof(unsigned int) * CHAR_BIT - sizeof(T) * CHAR_BIT); + } else if constexpr(sizeof(T) <= sizeof(unsigned long)) { + return data == 0 ? std::numeric_limits::digits : __builtin_clzl(data) - (sizeof(unsigned long) * CHAR_BIT - sizeof(T) * CHAR_BIT); + } else if constexpr(sizeof(T) <= sizeof(unsigned long long)) { + return data == 0 ? std::numeric_limits::digits : __builtin_clzll(data) - (sizeof(unsigned long long) * CHAR_BIT - sizeof(T) * CHAR_BIT); + } + } else { + if constexpr(sizeof(T) <= sizeof(unsigned int)) { + return data == 0 ? std::numeric_limits::digits + 1 : __builtin_clz(data) - (sizeof(unsigned int) * CHAR_BIT - sizeof(T) * CHAR_BIT); + } else if constexpr(sizeof(T) <= sizeof(unsigned long)) { + return data == 0 ? std::numeric_limits::digits + 1 : __builtin_clzl(data) - (sizeof(unsigned long) * CHAR_BIT - sizeof(T) * CHAR_BIT); + } else if constexpr(sizeof(T) <= sizeof(unsigned long long)) { + return data == 0 ? std::numeric_limits::digits + 1: __builtin_clzll(data) - (sizeof(unsigned long long) * CHAR_BIT - sizeof(T) * CHAR_BIT); + } + } + } + } #endif - | namespace reducer { diff --git a/primitive_data/primitives/binary.yaml b/primitive_data/primitives/binary.yaml index b15a087a..c42dd29d 100644 --- a/primitive_data/primitives/binary.yaml +++ b/primitive_data/primitives/binary.yaml @@ -1145,255 +1145,30 @@ testing: # return lzc(data) == 0; definitions: - target_extension: ["avx512"] - ctype: ["uint8_t", "uint16_t", "uint32_t", "uint64_t"] - lscpu_flags: ["avx512f"] - implementation: | - using T = typename Vec::base_type; - static_assert(sizeof(T) <= sizeof(unsigned long long), "Unsupported type"); - if constexpr(sizeof(T) <= sizeof(unsigned int)) { - #if __cplusplus >= 202002L - return std::countl_zero(value); - #else - return data == 0 ? std::numeric_limits::digits : __builtin_clz(data) - (sizeof(unsigned int) * CHAR_BIT - sizeof(T) * CHAR_BIT); - #endif - } else if constexpr(sizeof(T) <= sizeof(unsigned long)) { - #if __cplusplus >= 202002L - return std::countl_zero(value); - #else - return data == 0 ? std::numeric_limits::digits : __builtin_clzl(data) - (sizeof(unsigned long) * CHAR_BIT - sizeof(T) * CHAR_BIT); - #endif - } else if constexpr(sizeof(T) <= sizeof(unsigned long long)) { - #if __cplusplus >= 202002L - return std::countl_zero(value); - #else - return data == 0 ? std::numeric_limits::digits : __builtin_clzll(data) - (sizeof(unsigned long long) * CHAR_BIT - sizeof(T) * CHAR_BIT); - #endif - } - - target_extension: ["avx512"] - ctype: ["int8_t", "int16_t", "int32_t", "int64_t"] + ctype: ["uint8_t", "uint16_t", "uint32_t", "uint64_t", "int8_t", "int16_t", "int32_t", "int64_t"] lscpu_flags: ["avx512f"] implementation: | - using T = typename Vec::base_type; - static_assert(sizeof(T) <= sizeof(unsigned long long), "Unsupported type"); - if constexpr(sizeof(T) <= sizeof(unsigned int)) { - #if __cplusplus >= 202002L - return std::countl_zero(value); - #else - return data == 0 ? std::numeric_limits::digits+1 : __builtin_clz(data) - (sizeof(unsigned int) * CHAR_BIT - sizeof(T) * CHAR_BIT); - #endif - } else if constexpr(sizeof(T) <= sizeof(unsigned long)) { - #if __cplusplus >= 202002L - return std::countl_zero(value); - #else - return data == 0 ? std::numeric_limits::digits+1 : __builtin_clzl(data) - (sizeof(unsigned long) * CHAR_BIT - sizeof(T) * CHAR_BIT); - #endif - } else if constexpr(sizeof(T) <= sizeof(unsigned long long)) { - #if __cplusplus >= 202002L - return std::countl_zero(value); - #else - return data == 0 ? std::numeric_limits::digits+1 : __builtin_clzll(data) - (sizeof(unsigned long long) * CHAR_BIT - sizeof(T) * CHAR_BIT); - #endif - } - - target_extension: ["avx2"] - ctype: ["uint8_t", "uint16_t", "uint32_t", "uint64_t"] - lscpu_flags: ["avx"] - implementation: | - using T = typename Vec::base_type; - static_assert(sizeof(T) <= sizeof(unsigned long long), "Unsupported type"); - if constexpr(sizeof(T) <= sizeof(unsigned int)) { - #if __cplusplus >= 202002L - return std::countl_zero(value); - #else - return data == 0 ? std::numeric_limits::digits : __builtin_clz(data) - (sizeof(unsigned int) * CHAR_BIT - sizeof(T) * CHAR_BIT); - #endif - } else if constexpr(sizeof(T) <= sizeof(unsigned long)) { - #if __cplusplus >= 202002L - return std::countl_zero(value); - #else - return data == 0 ? std::numeric_limits::digits : __builtin_clzl(data) - (sizeof(unsigned long) * CHAR_BIT - sizeof(T) * CHAR_BIT); - #endif - } else if constexpr(sizeof(T) <= sizeof(unsigned long long)) { - #if __cplusplus >= 202002L - return std::countl_zero(value); - #else - return data == 0 ? std::numeric_limits::digits : __builtin_clzll(data) - (sizeof(unsigned long long) * CHAR_BIT - sizeof(T) * CHAR_BIT); - #endif - } + return details::clz(data); - target_extension: ["avx2"] - ctype: ["int8_t", "int16_t", "int32_t", "int64_t"] + ctype: ["uint8_t", "uint16_t", "uint32_t", "uint64_t", "int8_t", "int16_t", "int32_t", "int64_t"] lscpu_flags: ["avx"] implementation: | - using T = typename Vec::base_type; - static_assert(sizeof(T) <= sizeof(unsigned long long), "Unsupported type"); - if constexpr(sizeof(T) <= sizeof(unsigned int)) { - #if __cplusplus >= 202002L - return std::countl_zero(value); - #else - return data == 0 ? std::numeric_limits::digits+1 : __builtin_clz(data) - (sizeof(unsigned int) * CHAR_BIT - sizeof(T) * CHAR_BIT); - #endif - } else if constexpr(sizeof(T) <= sizeof(unsigned long)) { - #if __cplusplus >= 202002L - return std::countl_zero(value); - #else - return data == 0 ? std::numeric_limits::digits+1 : __builtin_clzl(data) - (sizeof(unsigned long) * CHAR_BIT - sizeof(T) * CHAR_BIT); - #endif - } else if constexpr(sizeof(T) <= sizeof(unsigned long long)) { - #if __cplusplus >= 202002L - return std::countl_zero(value); - #else - return data == 0 ? std::numeric_limits::digits+1 : __builtin_clzll(data) - (sizeof(unsigned long long) * CHAR_BIT - sizeof(T) * CHAR_BIT); - #endif - } - - target_extension: ["sse"] - ctype: ["uint8_t", "uint16_t", "uint32_t", "uint64_t"] - lscpu_flags: ["sse"] - implementation: | - using T = typename Vec::base_type; - static_assert(sizeof(T) <= sizeof(unsigned long long), "Unsupported type"); - if constexpr(sizeof(T) <= sizeof(unsigned int)) { - #if __cplusplus >= 202002L - return std::countl_zero(value); - #else - return data == 0 ? std::numeric_limits::digits : __builtin_clz(data) - (sizeof(unsigned int) * CHAR_BIT - sizeof(T) * CHAR_BIT); - #endif - } else if constexpr(sizeof(T) <= sizeof(unsigned long)) { - #if __cplusplus >= 202002L - return std::countl_zero(value); - #else - return data == 0 ? std::numeric_limits::digits : __builtin_clzl(data) - (sizeof(unsigned long) * CHAR_BIT - sizeof(T) * CHAR_BIT); - #endif - } else if constexpr(sizeof(T) <= sizeof(unsigned long long)) { - #if __cplusplus >= 202002L - return std::countl_zero(value); - #else - return data == 0 ? std::numeric_limits::digits : __builtin_clzll(data) - (sizeof(unsigned long long) * CHAR_BIT - sizeof(T) * CHAR_BIT); - #endif - } + return details::clz(data); - target_extension: ["sse"] - ctype: ["int8_t", "int16_t", "int32_t", "int64_t"] + ctype: ["uint8_t", "uint16_t", "uint32_t", "uint64_t", "int8_t", "int16_t", "int32_t", "int64_t"] lscpu_flags: ["sse"] implementation: | - using T = typename Vec::base_type; - static_assert(sizeof(T) <= sizeof(unsigned long long), "Unsupported type"); - if constexpr(sizeof(T) <= sizeof(unsigned int)) { - #if __cplusplus >= 202002L - return std::countl_zero(value); - #else - return data == 0 ? std::numeric_limits::digits+1 : __builtin_clz(data) - (sizeof(unsigned int) * CHAR_BIT - sizeof(T) * CHAR_BIT); - #endif - } else if constexpr(sizeof(T) <= sizeof(unsigned long)) { - #if __cplusplus >= 202002L - return std::countl_zero(value); - #else - return data == 0 ? std::numeric_limits::digits+1 : __builtin_clzl(data) - (sizeof(unsigned long) * CHAR_BIT - sizeof(T) * CHAR_BIT); - #endif - } else if constexpr(sizeof(T) <= sizeof(unsigned long long)) { - #if __cplusplus >= 202002L - return std::countl_zero(value); - #else - return data == 0 ? std::numeric_limits::digits+1 : __builtin_clzll(data) - (sizeof(unsigned long long) * CHAR_BIT - sizeof(T) * CHAR_BIT); - #endif - } - - target_extension: ["neon"] - ctype: ["uint8_t", "uint16_t", "uint32_t", "uint64_t"] - lscpu_flags: ["neon"] - implementation: | - using T = typename Vec::base_type; - static_assert(sizeof(T) <= sizeof(unsigned long long), "Unsupported type"); - if constexpr(sizeof(T) <= sizeof(unsigned int)) { - #if __cplusplus >= 202002L - return std::countl_zero(value); - #else - return data == 0 ? std::numeric_limits::digits : __builtin_clz(data) - (sizeof(unsigned int) * CHAR_BIT - sizeof(T) * CHAR_BIT); - #endif - } else if constexpr(sizeof(T) <= sizeof(unsigned long)) { - #if __cplusplus >= 202002L - return std::countl_zero(value); - #else - return data == 0 ? std::numeric_limits::digits : __builtin_clzl(data) - (sizeof(unsigned long) * CHAR_BIT - sizeof(T) * CHAR_BIT); - #endif - } else if constexpr(sizeof(T) <= sizeof(unsigned long long)) { - #if __cplusplus >= 202002L - return std::countl_zero(value); - #else - return data == 0 ? std::numeric_limits::digits : __builtin_clzll(data) - (sizeof(unsigned long long) * CHAR_BIT - sizeof(T) * CHAR_BIT); - #endif - } + return details::clz(data); - target_extension: ["neon"] - ctype: ["int8_t", "int16_t", "int32_t", "int64_t"] + ctype: ["uint8_t", "uint16_t", "uint32_t", "uint64_t", "int8_t", "int16_t", "int32_t", "int64_t"] lscpu_flags: ["neon"] implementation: | - using T = typename Vec::base_type; - static_assert(sizeof(T) <= sizeof(unsigned long long), "Unsupported type"); - if constexpr(sizeof(T) <= sizeof(unsigned int)) { - #if __cplusplus >= 202002L - return std::countl_zero(value); - #else - return data == 0 ? std::numeric_limits::digits+1 : __builtin_clz(data) - (sizeof(unsigned int) * CHAR_BIT - sizeof(T) * CHAR_BIT); - #endif - } else if constexpr(sizeof(T) <= sizeof(unsigned long)) { - #if __cplusplus >= 202002L - return std::countl_zero(value); - #else - return data == 0 ? std::numeric_limits::digits+1 : __builtin_clzl(data) - (sizeof(unsigned long) * CHAR_BIT - sizeof(T) * CHAR_BIT); - #endif - } else if constexpr(sizeof(T) <= sizeof(unsigned long long)) { - #if __cplusplus >= 202002L - return std::countl_zero(value); - #else - return data == 0 ? std::numeric_limits::digits+1 : __builtin_clzll(data) - (sizeof(unsigned long long) * CHAR_BIT - sizeof(T) * CHAR_BIT); - #endif - } - - target_extension: ["scalar"] - ctype: ["uint8_t", "uint16_t", "uint32_t", "uint64_t"] - lscpu_flags: [] - implementation: | - using T = typename Vec::base_type; - static_assert(sizeof(T) <= sizeof(unsigned long long), "Unsupported type"); - if constexpr(sizeof(T) <= sizeof(unsigned int)) { - #if __cplusplus >= 202002L - return std::countl_zero(value); - #else - return data == 0 ? std::numeric_limits::digits : __builtin_clz(data) - (sizeof(unsigned int) * CHAR_BIT - sizeof(T) * CHAR_BIT); - #endif - } else if constexpr(sizeof(T) <= sizeof(unsigned long)) { - #if __cplusplus >= 202002L - return std::countl_zero(value); - #else - return data == 0 ? std::numeric_limits::digits : __builtin_clzl(data) - (sizeof(unsigned long) * CHAR_BIT - sizeof(T) * CHAR_BIT); - #endif - } else if constexpr(sizeof(T) <= sizeof(unsigned long long)) { - #if __cplusplus >= 202002L - return std::countl_zero(value); - #else - return data == 0 ? std::numeric_limits::digits : __builtin_clzll(data) - (sizeof(unsigned long long) * CHAR_BIT - sizeof(T) * CHAR_BIT); - #endif - } + return details::clz(data); - target_extension: ["scalar"] - ctype: ["int8_t", "int16_t", "int32_t", "int64_t"] + ctype: ["uint8_t", "uint16_t", "uint32_t", "uint64_t", "int8_t", "int16_t", "int32_t", "int64_t"] lscpu_flags: [] implementation: | - using T = typename Vec::base_type; - static_assert(sizeof(T) <= sizeof(unsigned long long), "Unsupported type"); - if constexpr(sizeof(T) <= sizeof(unsigned int)) { - #if __cplusplus >= 202002L - return std::countl_zero(value); - #else - return data == 0 ? std::numeric_limits::digits+1 : __builtin_clz(data) - (sizeof(unsigned int) * CHAR_BIT - sizeof(T) * CHAR_BIT); - #endif - } else if constexpr(sizeof(T) <= sizeof(unsigned long)) { - #if __cplusplus >= 202002L - return std::countl_zero(value); - #else - return data == 0 ? std::numeric_limits::digits+1 : __builtin_clzl(data) - (sizeof(unsigned long) * CHAR_BIT - sizeof(T) * CHAR_BIT); - #endif - } else if constexpr(sizeof(T) <= sizeof(unsigned long long)) { - #if __cplusplus >= 202002L - return std::countl_zero(value); - #else - return data == 0 ? std::numeric_limits::digits+1 : __builtin_clzll(data) - (sizeof(unsigned long long) * CHAR_BIT - sizeof(T) * CHAR_BIT); - #endif - } + return details::clz(data); ... --- primitive_name: "hor" From e15618434bd022aafad4b6b200d0810bffa73854 Mon Sep 17 00:00:00 2001 From: Johannes Pietrzyk Date: Tue, 4 Jul 2023 17:22:12 +0200 Subject: [PATCH 05/47] Fixed bugs in scalar lzc --- .../static_files/core/utils/functional.yaml | 19 +++++----- primitive_data/primitives/binary.yaml | 36 +++++++++---------- 2 files changed, 27 insertions(+), 28 deletions(-) diff --git a/generator/static_files/core/utils/functional.yaml b/generator/static_files/core/utils/functional.yaml index e89de012..0a4ab56f 100644 --- a/generator/static_files/core/utils/functional.yaml +++ b/generator/static_files/core/utils/functional.yaml @@ -17,15 +17,15 @@ implementations: #if ((__cplusplus >= 202002L) && (__has_include())) #include namespace details { - template - constexpr int clz(T data) { - return std::countl_zero(data); + template + constexpr RetT clz(T data) { + return std::countl_zero>(data); } } #else namespace details { - template - auto clz(T data) { + template + RetT clz(T data) { static_assert(sizeof(T) <= sizeof(unsigned long long), "Unsupported type"); if constexpr(std::is_unsigned_v) { if constexpr(sizeof(T) <= sizeof(unsigned int)) { @@ -36,12 +36,13 @@ implementations: return data == 0 ? std::numeric_limits::digits : __builtin_clzll(data) - (sizeof(unsigned long long) * CHAR_BIT - sizeof(T) * CHAR_BIT); } } else { - if constexpr(sizeof(T) <= sizeof(unsigned int)) { - return data == 0 ? std::numeric_limits::digits + 1 : __builtin_clz(data) - (sizeof(unsigned int) * CHAR_BIT - sizeof(T) * CHAR_BIT); + using UT = std::make_unsigned_t; + if constexpr(sizeof(UT) <= sizeof(unsigned int)) { + return data == 0 ? std::numeric_limits::digits : __builtin_clz(static_cast(data)) - (sizeof(unsigned int) * CHAR_BIT - sizeof(T) * CHAR_BIT); } else if constexpr(sizeof(T) <= sizeof(unsigned long)) { - return data == 0 ? std::numeric_limits::digits + 1 : __builtin_clzl(data) - (sizeof(unsigned long) * CHAR_BIT - sizeof(T) * CHAR_BIT); + return data == 0 ? std::numeric_limits::digits : __builtin_clzl(static_cast(data)) - (sizeof(unsigned long) * CHAR_BIT - sizeof(T) * CHAR_BIT); } else if constexpr(sizeof(T) <= sizeof(unsigned long long)) { - return data == 0 ? std::numeric_limits::digits + 1: __builtin_clzll(data) - (sizeof(unsigned long long) * CHAR_BIT - sizeof(T) * CHAR_BIT); + return data == 0 ? std::numeric_limits::digits : __builtin_clzll(static_cast(data)) - (sizeof(unsigned long long) * CHAR_BIT - sizeof(T) * CHAR_BIT); } } } diff --git a/primitive_data/primitives/binary.yaml b/primitive_data/primitives/binary.yaml index c42dd29d..d4cef4e1 100644 --- a/primitive_data/primitives/binary.yaml +++ b/primitive_data/primitives/binary.yaml @@ -1128,47 +1128,45 @@ testing: implementation: | using T = typename Vec::base_type; T data = 0; - std::cout << "test_zero" << std::endl; - std::cout << lzc(data) << std::endl; return lzc(data) == (sizeof(T)*CHAR_BIT); - # - test_name: "test_one" - # includes: ["", ""] - # implementation: | - # using T = typename Vec::base_type; - # T data = 0; - # return lzc(data) == (sizeof(T)*CHAR_BIT-1); - # - test_name: "test_msb" - # includes: ["", ""] - # implementation: | - # using T = typename Vec::base_type; - # T data = -1; - # return lzc(data) == 0; + - test_name: "test_one" + includes: ["", ""] + implementation: | + using T = typename Vec::base_type; + T data = 1; + return lzc(data) == (sizeof(T)*CHAR_BIT-1); + - test_name: "test_msb" + includes: ["", ""] + implementation: | + using T = typename Vec::base_type; + T data = (T)1 << ((T)sizeof(T)*CHAR_BIT-1); + return lzc(data) == 0; definitions: - target_extension: ["avx512"] ctype: ["uint8_t", "uint16_t", "uint32_t", "uint64_t", "int8_t", "int16_t", "int32_t", "int64_t"] lscpu_flags: ["avx512f"] implementation: | - return details::clz(data); + return details::clz(data); - target_extension: ["avx2"] ctype: ["uint8_t", "uint16_t", "uint32_t", "uint64_t", "int8_t", "int16_t", "int32_t", "int64_t"] lscpu_flags: ["avx"] implementation: | - return details::clz(data); + return details::clz(data); - target_extension: ["sse"] ctype: ["uint8_t", "uint16_t", "uint32_t", "uint64_t", "int8_t", "int16_t", "int32_t", "int64_t"] lscpu_flags: ["sse"] implementation: | - return details::clz(data); + return details::clz(data); - target_extension: ["neon"] ctype: ["uint8_t", "uint16_t", "uint32_t", "uint64_t", "int8_t", "int16_t", "int32_t", "int64_t"] lscpu_flags: ["neon"] implementation: | - return details::clz(data); + return details::clz(data); - target_extension: ["scalar"] ctype: ["uint8_t", "uint16_t", "uint32_t", "uint64_t", "int8_t", "int16_t", "int32_t", "int64_t"] lscpu_flags: [] implementation: | - return details::clz(data); + return details::clz(data); ... --- primitive_name: "hor" From 4134cfee198eb61fde1de87f6be79dfdadbfd9a3 Mon Sep 17 00:00:00 2001 From: Johannes Pietrzyk Date: Thu, 6 Jul 2023 09:40:00 +0200 Subject: [PATCH 06/47] fixed missing variable declaration --- primitive_data/primitives/compare.yaml | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/primitive_data/primitives/compare.yaml b/primitive_data/primitives/compare.yaml index de183528..dc3bf05a 100644 --- a/primitive_data/primitives/compare.yaml +++ b/primitive_data/primitives/compare.yaml @@ -140,6 +140,7 @@ definitions: T result{}; // Create a value with all bits set to 1, regardless of underlying type {% if ctype in ["float", "double"] %} + typename Vec::base_type checker; memset((void*)&checker, 0xff, sizeof(checker)); {% else %} typename Vec::base_type checker = ~0; @@ -298,6 +299,7 @@ definitions: T result{}; // Create a value with all bits set to 1, regardless of underlying type {% if ctype in ["float", "double"] %} + typename Vec::base_type checker; memset((void*)&checker, 0xff, sizeof(checker)); {% else %} typename Vec::base_type checker = ~0; @@ -375,6 +377,7 @@ definitions: T result{}; // Create a value with all bits set to 1, regardless of underlying type {% if ctype in ["float", "double"] %} + typename Vec::base_type checker; memset((void*)&checker, 0xff, sizeof(checker)); {% else %} typename Vec::base_type checker = ~0; @@ -519,6 +522,7 @@ definitions: T result{}; // Create a value with all bits set to 1, regardless of underlying type {% if ctype in ["float", "double"] %} + typename Vec::base_type checker; memset((void*)&checker, 0xff, sizeof(checker)); {% else %} typename Vec::base_type checker = ~0; @@ -660,6 +664,7 @@ definitions: T result{}; // Create a value with all bits set to 1, regardless of underlying type {% if ctype in ["float", "double"] %} + typename Vec::base_type checker; memset((void*)&checker, 0xff, sizeof(checker)); {% else %} typename Vec::base_type checker = ~0; @@ -812,6 +817,7 @@ definitions: T result{}; // Create a value with all bits set to 1, regardless of underlying type {% if ctype in ["float", "double"] %} + typename Vec::base_type checker; memset((void*)&checker, 0xff, sizeof(checker)); {% else %} typename Vec::base_type checker = ~0; @@ -965,6 +971,7 @@ definitions: T result{}; // Create a value with all bits set to 1, regardless of underlying type {% if ctype in ["float", "double"] %} + typename Vec::base_type checker; memset((void*)&checker, 0xff, sizeof(checker)); {% else %} typename Vec::base_type checker = ~0; From 57b44df685dcbe91ff9bb76831e0c75e03bcb713 Mon Sep 17 00:00:00 2001 From: Johannes Pietrzyk Date: Fri, 14 Jul 2023 09:33:56 +0200 Subject: [PATCH 07/47] Added some primitives --- .../static_files/core/utils/functional.yaml | 37 +++++++++++ .../expansions/tests/test_functions.yaml | 6 +- primitive_data/primitives/binary.yaml | 7 ++ primitive_data/primitives/mask.yaml | 65 +++++++++++++++++++ 4 files changed, 114 insertions(+), 1 deletion(-) diff --git a/generator/static_files/core/utils/functional.yaml b/generator/static_files/core/utils/functional.yaml index 0a4ab56f..3a5dad3d 100644 --- a/generator/static_files/core/utils/functional.yaml +++ b/generator/static_files/core/utils/functional.yaml @@ -21,6 +21,42 @@ implementations: constexpr RetT clz(T data) { return std::countl_zero>(data); } + + template + struct clz_recursive; + template + struct clz_recursive { + __attribute__((always_inline)) inline static auto apply(uint8_t x) -> RetT { + static constexpr uint8_t clz_lookup[16] = {4, 3, 2, 2, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0}; + auto upper = x >> 4; + auto lower = x & 0x0F; + return upper ? clz_lookup[upper] : 4 + clz_lookup[lower]; + } + }; + template + struct clz_recursive { + __attribute__((always_inline)) inline static auto apply(uint16_t x) -> RetT { + auto upper = uint8_t(x >> 8); + auto lower = uint8_t(x & 0xFF); + return upper ? clz_recursive::apply(upper) : 8 + clz_recursive::apply(lower); + } + }; + template + struct clz_recursive { + __attribute__((always_inline)) inline static auto apply(uint32_t x) -> RetT { + auto upper = uint16_t(x >> 16); + auto lower = uint16_t(x & 0xFFFF); + return upper ? clz_recursive::apply(upper) : 16 + clz_recursive::apply(lower); + } + }; + template + struct clz_recursive { + __attribute__((always_inline)) inline static auto apply(uint64_t x) -> RetT { + auto upper = uint32_t(x >> 32); + auto lower = uint32_t(x & 0xFFFFFFFF); + return upper ? clz_recursive::apply(upper) : 32 + clz_recursive::apply(lower); + } + }; } #else namespace details { @@ -47,6 +83,7 @@ implementations: } } } + #endif - | namespace reducer { diff --git a/generator/static_files/expansions/tests/test_functions.yaml b/generator/static_files/expansions/tests/test_functions.yaml index c8212107..e8ca78a0 100644 --- a/generator/static_files/expansions/tests/test_functions.yaml +++ b/generator/static_files/expansions/tests/test_functions.yaml @@ -173,7 +173,7 @@ implementations: m_result_target_for_ref{tsl::allocate>(p_result_count*sizeof(result_t))}, m_result_target {(aligned) ? tsl::allocate_aligned(p_result_count*sizeof(result_t), Vec::vector_alignment()) : tsl::allocate(p_result_count*sizeof(result_t))} { - + std::memset(reinterpret_cast(m_result_ref), 0, p_result_count*sizeof(result_t)); } test_memory_helper_t(std::size_t p_data_element_count, std::size_t p_result_count, bool aligned, std::function const& fun = rnd_init): m_data_element_count {p_data_element_count}, @@ -186,6 +186,7 @@ implementations: { fun(m_data_ref, p_data_element_count); tsl::memory_cp(m_data_target, m_data_ref, p_data_element_count*sizeof(base_t), 1); + std::memset(reinterpret_cast(m_result_ref), 0, p_result_count*sizeof(result_t)); } template test_memory_helper_t(std::size_t p_data_element_count, std::size_t p_result_count, bool aligned, void (*fun)(base_t*, std::size_t, Ts...), Ts... init_args): @@ -199,6 +200,7 @@ implementations: { fun(m_data_ref, p_data_element_count, init_args...); tsl::memory_cp(m_data_target, m_data_ref, p_data_element_count*sizeof(base_t), 1); + std::memset(reinterpret_cast(m_result_ref), 0, p_result_count*sizeof(result_t)); } template test_memory_helper_t(std::size_t p_data_element_count, std::size_t p_result_count, bool aligned, std::function const & fun, base_t start): @@ -212,6 +214,7 @@ implementations: { fun(m_data_ref, p_data_element_count, start); tsl::memory_cp(m_data_target, m_data_ref, p_data_element_count*sizeof(base_t), 1); + std::memset(reinterpret_cast(m_result_ref), 0, p_result_count*sizeof(result_t)); } virtual ~test_memory_helper_t() { tsl::deallocate(m_result_target); @@ -237,6 +240,7 @@ implementations: bool validate() const { bool result = true; for(auto i = 0; i < m_result_count; ++i) { + //std::cerr << m_result_ref[i] << " " << m_result_target_for_ref[i] << std::endl; result &= check_value(m_result_ref[i], m_result_target_for_ref[i]); } return result; diff --git a/primitive_data/primitives/binary.yaml b/primitive_data/primitives/binary.yaml index d4cef4e1..0a9ed8ea 100644 --- a/primitive_data/primitives/binary.yaml +++ b/primitive_data/primitives/binary.yaml @@ -1167,6 +1167,13 @@ definitions: lscpu_flags: [] implementation: | return details::clz(data); +#INTEL - FPGA + - target_extension: ["oneAPIfpga", "oneAPIfpgaRTL"] + ctype: ["uint8_t", "int8_t", "uint16_t", "int16_t", "uint32_t", "int32_t", "uint64_t", "int64_t"] + lscpu_flags: ["oneAPIfpgaDev"] + vector_length_agnostic: True + implementation: | + return details::clz_recursive::apply(data); ... --- primitive_name: "hor" diff --git a/primitive_data/primitives/mask.yaml b/primitive_data/primitives/mask.yaml index 29a45df8..65a4c066 100644 --- a/primitive_data/primitives/mask.yaml +++ b/primitive_data/primitives/mask.yaml @@ -56,6 +56,46 @@ testing: #optional allOk &= (matches == i); } return allOk; + - test_name: "mask_match_alternate" + requires: ["loadu", "set1", "storeu"] + includes: ["", "", "", ""] + implementation: | + using T = typename Vec::base_type; + bool allOk = true; + if constexpr (std::is_same_v) { + std::cerr << "implement me" << std::endl; + return true; + } else if constexpr (std::is_same_v) { + //we don't need to do anything here since it is a NOP. + return true; + } else { + //e.g., SSE, AVX2, ... + auto data_create_lambda = [](T* data, std::size_t element_count) { + for (size_t i = 0; i < element_count; ++i) { + data[i] = i % 2 == 0 ? -1 : 0; + } + }; + testing::test_memory_helper_t test_helper{Vec::vector_element_count(), Vec::vector_element_count(), false, data_create_lambda}; + tsl::storeu(test_helper.result_target(), tsl::set1(0)); + auto reference_result_ptr = reinterpret_cast(test_helper.result_ref()); + typename Vec::imask_type expected_result = 0; + for (size_t i = 0; i < Vec::vector_element_count(); ++i) { + expected_result |= i % 2 == 0 ? 0b1 << i : 0; + } + *reference_result_ptr = expected_result; + auto test_data_ptr = test_helper.data_target(); + auto test_result_ptr = reinterpret_cast(test_helper.result_target()); + auto vec_mask = loadu(test_data_ptr); + auto mask_integral = to_integral(vec_mask); + *test_result_ptr = mask_integral; + test_helper.synchronize(); + /*std::cerr << tsl::type_name() << std::endl; + std::cerr << "expected_result: " << std::bitset<64>{(unsigned long long)expected_result} << std::endl; + std::cerr << "test_result: " << std::bitset<64>{(unsigned long long)mask_integral} << std::endl; + std::cerr << "Same values? " << ((expected_result == mask_integral) ? "True" : "False")<< std::endl; + std::cerr << "Same values? " << ((test_helper.validate()) ? "True" : "False")<< std::endl;*/ + return test_helper.validate(); + } definitions: #INTEL - AVX512 - target_extension: "avx512" @@ -921,6 +961,31 @@ definitions: lscpu_flags: [] implementation: return false; ... +--- +primitive_name: "load_mask" +brief_description: "Loads data from memory to a mask." +parameters: + - ctype: "typename Vec::imask_type const*" + name: "memory" + description: "Memory where the mask data should be loaded from." +returns: + ctype: "typename Vec::mask_type" + name: "result" + description: "Mask containing the loaded data." +definitions: + - target_extension: "avx512" + ctype: ["uint8_t", "uint16_t", "uint32_t", "uint64_t", "int8_t", "int16_t", "int32_t", "int64_t", "float", "double"] + lscpu_flags: ["avx512f"] + implementation: "return *memory;" + - target_extension: "avx2" + ctype: ["uint8_t", "uint16_t", "uint32_t", "uint64_t", "int8_t", "int16_t", "int32_t", "int64_t", "float", "double"] + lscpu_flags: ["avx"] + implementation: "return tsl::to_mask(*memory);" + - target_extension: "sse" + ctype: ["uint8_t", "uint16_t", "uint32_t", "uint64_t", "int8_t", "int16_t", "int32_t", "int64_t", "float", "double"] + lscpu_flags: ["sse"] + implementation: "return tsl::to_mask(*memory);" +... #--- #primitive_name: "mask_reduce" #brief_description: "Masks out every non relevant bit." From 935a9bc1dfad889bfd01fc02326141529e97ddaf Mon Sep 17 00:00:00 2001 From: Johannes Pietrzyk Date: Fri, 14 Jul 2023 16:10:08 +0200 Subject: [PATCH 08/47] Added Conflict detection for fpga --- .../extensions/simd/intel/fpga_generic.yaml | 2 +- primitive_data/primitives/misc.yaml | 10 ++++++++ .../include/tslOneAPIfpgaHelper.hpp | 24 +++++++++++++++++++ 3 files changed, 35 insertions(+), 1 deletion(-) create mode 100644 supplementary/oneApiFPGA/include/tslOneAPIfpgaHelper.hpp diff --git a/primitive_data/extensions/simd/intel/fpga_generic.yaml b/primitive_data/extensions/simd/intel/fpga_generic.yaml index 7a371ac7..fe9880bf 100644 --- a/primitive_data/extensions/simd/intel/fpga_generic.yaml +++ b/primitive_data/extensions/simd/intel/fpga_generic.yaml @@ -3,7 +3,7 @@ description: "Definition of the SIMD TargetExtension fpga for Intel fpga using o vendor: "intel" extension_name: "oneAPIfpga" lscpu_flags: ["oneAPIfpgaDev"] -includes: ["", "", "", "", ""] +includes: ["", "", "", "", "", ""] simdT_name: "oneAPIfpga" needs_arch_flags: False simdT_default_size_in_bits: 512 #this is just a default value, we can let it out if we want to diff --git a/primitive_data/primitives/misc.yaml b/primitive_data/primitives/misc.yaml index 539b763b..8acf05e8 100644 --- a/primitive_data/primitives/misc.yaml +++ b/primitive_data/primitives/misc.yaml @@ -105,6 +105,16 @@ definitions: ctype: [ "uint8_t", "uint16_t", "uint32_t", "uint64_t", "int8_t", "int16_t", "int32_t", "int64_t"] lscpu_flags: [] implementation: "return 0;" + #FPGA + - target_extension: ["oneAPIfpga", "oneAPIfpgaRTL"] + ctype: ["uint8_t", "int8_t", "uint16_t", "int16_t", "uint32_t", "int32_t", "uint64_t", "int64_t"] + lscpu_flags: ["oneAPIfpgaDev"] + vector_length_agnostic: True + implementation: | + typename Vec::register_type result{}; + tsl::oneAPIfpga::details::conflictReduce(result, data, std::make_index_sequence{}); + result[0] = 0; + return result; ... --- primitive_name: "conflict_free" diff --git a/supplementary/oneApiFPGA/include/tslOneAPIfpgaHelper.hpp b/supplementary/oneApiFPGA/include/tslOneAPIfpgaHelper.hpp new file mode 100644 index 00000000..b1eeb175 --- /dev/null +++ b/supplementary/oneApiFPGA/include/tslOneAPIfpgaHelper.hpp @@ -0,0 +1,24 @@ +#ifndef TSL_SUPPLEMENTARY_ONEAPIFPGA_TSLONEAPIFPGAHELPER_HPP +#define TSL_SUPPLEMENTARY_ONEAPIFPGA_TSLONEAPIFPGAHELPER_HPP + +#include +#include +#include +#include + + +namespace tsl { + namespace oneAPIfpga { + namespace details { + template + __attribute__((always_inline)) inline typename Vec::base_type conflictReduceImpl(typename Vec::register_type const & data, std::index_sequence) { + return ((typename Vec::base_type)0 | ... | (data[Upper+1] == data[Rest] ? ((typename Vec::base_type)1 << (typename Vec::base_type)Rest) : (typename Vec::base_type)0)); + } + template + __attribute__((always_inline)) inline void conflictReduce(typename Vec::register_type & result, typename Vec::register_type const & data, std::index_sequence) { + ((result[Is+1] = conflictReduceImpl(data, std::make_index_sequence{})), ...); + } + } + } +} +#endif //TSL_SUPPLEMENTARY_ONEAPIFPGA_TSLONEAPIFPGAHELPER_HPP \ No newline at end of file From 810d11283989179d733e9307ec25843ccc1f44ad Mon Sep 17 00:00:00 2001 From: Johannes Pietrzyk Date: Fri, 14 Jul 2023 16:29:13 +0200 Subject: [PATCH 09/47] Renamed namespace to avoid name clash --- primitive_data/primitives/misc.yaml | 2 +- supplementary/oneApiFPGA/include/tslOneAPIfpgaHelper.hpp | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/primitive_data/primitives/misc.yaml b/primitive_data/primitives/misc.yaml index 8acf05e8..c9701f6d 100644 --- a/primitive_data/primitives/misc.yaml +++ b/primitive_data/primitives/misc.yaml @@ -112,7 +112,7 @@ definitions: vector_length_agnostic: True implementation: | typename Vec::register_type result{}; - tsl::oneAPIfpga::details::conflictReduce(result, data, std::make_index_sequence{}); + tsl::oneAPIfpgaFun::details::conflictReduce(result, data, std::make_index_sequence{}); result[0] = 0; return result; ... diff --git a/supplementary/oneApiFPGA/include/tslOneAPIfpgaHelper.hpp b/supplementary/oneApiFPGA/include/tslOneAPIfpgaHelper.hpp index b1eeb175..eea94444 100644 --- a/supplementary/oneApiFPGA/include/tslOneAPIfpgaHelper.hpp +++ b/supplementary/oneApiFPGA/include/tslOneAPIfpgaHelper.hpp @@ -8,7 +8,7 @@ namespace tsl { - namespace oneAPIfpga { + namespace oneAPIfpgaFun { namespace details { template __attribute__((always_inline)) inline typename Vec::base_type conflictReduceImpl(typename Vec::register_type const & data, std::index_sequence) { From b6504d8559b124db889dbc1a93c0f8b0fb231bc2 Mon Sep 17 00:00:00 2001 From: Johannes Pietrzyk Date: Mon, 17 Jul 2023 11:30:12 +0200 Subject: [PATCH 10/47] Added example to conflictReduceImpl --- supplementary/oneApiFPGA/include/tslOneAPIfpgaHelper.hpp | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/supplementary/oneApiFPGA/include/tslOneAPIfpgaHelper.hpp b/supplementary/oneApiFPGA/include/tslOneAPIfpgaHelper.hpp index eea94444..b26c52a7 100644 --- a/supplementary/oneApiFPGA/include/tslOneAPIfpgaHelper.hpp +++ b/supplementary/oneApiFPGA/include/tslOneAPIfpgaHelper.hpp @@ -10,6 +10,15 @@ namespace tsl { namespace oneAPIfpgaFun { namespace details { + /** + result[7] = ((data[7] == data[6]) ? 0b1000000 : 0) | ((data[7] == data[5]) ? 0b100000 : 0) | ((data[7] == data[4]) ? 0b10000 : 0) | ((data[7] == data[3]) ? 0b1000 : 0) | ((data[7] == data[2]) ? 0b100 : 0) | ((data[7] == data[1]) ? 0b10 : 0) | ((data[7] == data[0]) ? 0b1 : 0); + result[6] = ((data[6] == data[5]) ? 0b100000 : 0) | ((data[6] == data[4]) ? 0b10000 : 0) | ((data[6] == data[3]) ? 0b1000 : 0) | ((data[6] == data[2]) ? 0b100 : 0) | ((data[6] == data[1]) ? 0b10 : 0) | ((data[6] == data[0]) ? 0b1 : 0); + result[5] = ((data[5] == data[4]) ? 0b10000 : 0) | ((data[5] == data[3]) ? 0b1000 : 0) | ((data[5] == data[2]) ? 0b100 : 0) | ((data[5] == data[1]) ? 0b10 : 0) | ((data[5] == data[0]) ? 0b1 : 0); + result[4] = ((data[4] == data[3]) ? 0b1000 : 0) | ((data[4] == data[2]) ? 0b100 : 0) | ((data[4] == data[1]) ? 0b10 : 0) | ((data[4] == data[0]) ? 0b1 : 0); + result[3] = ((data[3] == data[2]) ? 0b100 : 0) | ((data[3] == data[1]) ? 0b10 : 0) | ((data[3] == data[0]) ? 0b1 : 0); + result[2] = ((data[2] == data[1]) ? 0b10 : 0) | ((data[2] == data[0]) ? 0b1 : 0); + result[1] = ((data[1] == data[0]) ? 0b1 : 0); + */ template __attribute__((always_inline)) inline typename Vec::base_type conflictReduceImpl(typename Vec::register_type const & data, std::index_sequence) { return ((typename Vec::base_type)0 | ... | (data[Upper+1] == data[Rest] ? ((typename Vec::base_type)1 << (typename Vec::base_type)Rest) : (typename Vec::base_type)0)); From af1d2811adc1ce78f90de492f43fad9f2b16030c Mon Sep 17 00:00:00 2001 From: Johannes Pietrzyk Date: Mon, 17 Jul 2023 18:03:56 +0200 Subject: [PATCH 11/47] added ivdep to fpga load store --- primitive_data/primitives/ls.yaml | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/primitive_data/primitives/ls.yaml b/primitive_data/primitives/ls.yaml index 2839c15e..d20b5821 100644 --- a/primitive_data/primitives/ls.yaml +++ b/primitive_data/primitives/ls.yaml @@ -86,6 +86,7 @@ definitions: T reg; //initialize the result auto mem = assume_aligned(memory); #pragma unroll + [[intel::ivdep(reg)]] for (size_t idx = 0; idx < Vec::vector_element_count(); idx++) { reg[idx] = mem[idx]; } @@ -174,6 +175,7 @@ definitions: using T = typename Vec::register_type; T reg; //initialize the result #pragma unroll + [[intel::ivdep(reg)]] for (size_t idx = 0; idx < Vec::vector_element_count(); idx++) { reg[idx] = memory[idx]; } @@ -260,6 +262,7 @@ definitions: vector_length_agnostic: True implementation: | #pragma unroll + [[intel::ivdep(data)]] for (size_t idx = 0; idx < Vec::vector_element_count(); idx++) { memory[idx] = data[idx]; } @@ -343,6 +346,7 @@ definitions: vector_length_agnostic: True implementation: | #pragma unroll + [[intel::ivdep(data)]] for (size_t idx = 0; idx < Vec::vector_element_count(); idx++) { memory[idx] = data[idx]; } From cb0e2ec452ea5386ffe84d0317b9fb76cdc38057 Mon Sep 17 00:00:00 2001 From: Johannes Pietrzyk Date: Mon, 17 Jul 2023 18:03:56 +0200 Subject: [PATCH 12/47] Revert "added ivdep to fpga load store" This reverts commit af1d2811adc1ce78f90de492f43fad9f2b16030c. --- primitive_data/primitives/ls.yaml | 4 ---- 1 file changed, 4 deletions(-) diff --git a/primitive_data/primitives/ls.yaml b/primitive_data/primitives/ls.yaml index d20b5821..2839c15e 100644 --- a/primitive_data/primitives/ls.yaml +++ b/primitive_data/primitives/ls.yaml @@ -86,7 +86,6 @@ definitions: T reg; //initialize the result auto mem = assume_aligned(memory); #pragma unroll - [[intel::ivdep(reg)]] for (size_t idx = 0; idx < Vec::vector_element_count(); idx++) { reg[idx] = mem[idx]; } @@ -175,7 +174,6 @@ definitions: using T = typename Vec::register_type; T reg; //initialize the result #pragma unroll - [[intel::ivdep(reg)]] for (size_t idx = 0; idx < Vec::vector_element_count(); idx++) { reg[idx] = memory[idx]; } @@ -262,7 +260,6 @@ definitions: vector_length_agnostic: True implementation: | #pragma unroll - [[intel::ivdep(data)]] for (size_t idx = 0; idx < Vec::vector_element_count(); idx++) { memory[idx] = data[idx]; } @@ -346,7 +343,6 @@ definitions: vector_length_agnostic: True implementation: | #pragma unroll - [[intel::ivdep(data)]] for (size_t idx = 0; idx < Vec::vector_element_count(); idx++) { memory[idx] = data[idx]; } From 3be6c91a83377bc829a0d7d5c0e26b515b87e178 Mon Sep 17 00:00:00 2001 From: Johannes Pietrzyk Date: Wed, 19 Jul 2023 20:36:41 +0200 Subject: [PATCH 13/47] moved clz_recursive to ns details --- .../static_files/core/utils/functional.yaml | 57 ++++++++++--------- 1 file changed, 29 insertions(+), 28 deletions(-) diff --git a/generator/static_files/core/utils/functional.yaml b/generator/static_files/core/utils/functional.yaml index 3a5dad3d..482d3793 100644 --- a/generator/static_files/core/utils/functional.yaml +++ b/generator/static_files/core/utils/functional.yaml @@ -21,7 +21,35 @@ implementations: constexpr RetT clz(T data) { return std::countl_zero>(data); } - + } + #else + namespace details { + template + RetT clz(T data) { + static_assert(sizeof(T) <= sizeof(unsigned long long), "Unsupported type"); + if constexpr(std::is_unsigned_v) { + if constexpr(sizeof(T) <= sizeof(unsigned int)) { + return data == 0 ? std::numeric_limits::digits : __builtin_clz(data) - (sizeof(unsigned int) * CHAR_BIT - sizeof(T) * CHAR_BIT); + } else if constexpr(sizeof(T) <= sizeof(unsigned long)) { + return data == 0 ? std::numeric_limits::digits : __builtin_clzl(data) - (sizeof(unsigned long) * CHAR_BIT - sizeof(T) * CHAR_BIT); + } else if constexpr(sizeof(T) <= sizeof(unsigned long long)) { + return data == 0 ? std::numeric_limits::digits : __builtin_clzll(data) - (sizeof(unsigned long long) * CHAR_BIT - sizeof(T) * CHAR_BIT); + } + } else { + using UT = std::make_unsigned_t; + if constexpr(sizeof(UT) <= sizeof(unsigned int)) { + return data == 0 ? std::numeric_limits::digits : __builtin_clz(static_cast(data)) - (sizeof(unsigned int) * CHAR_BIT - sizeof(T) * CHAR_BIT); + } else if constexpr(sizeof(T) <= sizeof(unsigned long)) { + return data == 0 ? std::numeric_limits::digits : __builtin_clzl(static_cast(data)) - (sizeof(unsigned long) * CHAR_BIT - sizeof(T) * CHAR_BIT); + } else if constexpr(sizeof(T) <= sizeof(unsigned long long)) { + return data == 0 ? std::numeric_limits::digits : __builtin_clzll(static_cast(data)) - (sizeof(unsigned long long) * CHAR_BIT - sizeof(T) * CHAR_BIT); + } + } + } + } + #endif + - | + namespace details { template struct clz_recursive; template @@ -58,33 +86,6 @@ implementations: } }; } - #else - namespace details { - template - RetT clz(T data) { - static_assert(sizeof(T) <= sizeof(unsigned long long), "Unsupported type"); - if constexpr(std::is_unsigned_v) { - if constexpr(sizeof(T) <= sizeof(unsigned int)) { - return data == 0 ? std::numeric_limits::digits : __builtin_clz(data) - (sizeof(unsigned int) * CHAR_BIT - sizeof(T) * CHAR_BIT); - } else if constexpr(sizeof(T) <= sizeof(unsigned long)) { - return data == 0 ? std::numeric_limits::digits : __builtin_clzl(data) - (sizeof(unsigned long) * CHAR_BIT - sizeof(T) * CHAR_BIT); - } else if constexpr(sizeof(T) <= sizeof(unsigned long long)) { - return data == 0 ? std::numeric_limits::digits : __builtin_clzll(data) - (sizeof(unsigned long long) * CHAR_BIT - sizeof(T) * CHAR_BIT); - } - } else { - using UT = std::make_unsigned_t; - if constexpr(sizeof(UT) <= sizeof(unsigned int)) { - return data == 0 ? std::numeric_limits::digits : __builtin_clz(static_cast(data)) - (sizeof(unsigned int) * CHAR_BIT - sizeof(T) * CHAR_BIT); - } else if constexpr(sizeof(T) <= sizeof(unsigned long)) { - return data == 0 ? std::numeric_limits::digits : __builtin_clzl(static_cast(data)) - (sizeof(unsigned long) * CHAR_BIT - sizeof(T) * CHAR_BIT); - } else if constexpr(sizeof(T) <= sizeof(unsigned long long)) { - return data == 0 ? std::numeric_limits::digits : __builtin_clzll(static_cast(data)) - (sizeof(unsigned long long) * CHAR_BIT - sizeof(T) * CHAR_BIT); - } - } - } - } - - #endif - | namespace reducer { template From d6b9087007de4b8d3b3a4cd3e2d0f2c524c1f86c Mon Sep 17 00:00:00 2001 From: Johannes Pietrzyk Date: Sat, 22 Jul 2023 20:26:10 +0200 Subject: [PATCH 14/47] fixed modulo for fpga --- primitive_data/primitives/calc.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/primitive_data/primitives/calc.yaml b/primitive_data/primitives/calc.yaml index 9efef1c4..64af553c 100644 --- a/primitive_data/primitives/calc.yaml +++ b/primitive_data/primitives/calc.yaml @@ -1234,7 +1234,7 @@ definitions: T result; //initialize the result #pragma unroll for(int i = 0; i < Vec::vector_element_count(); ++i) { - result[i] = vec[i] - ((vec[i] / val[i]) * val[i]); + result[i] = vec[i] - ((vec[i] / val) * val); } return result; ... From a1bf79dad1546a44594c9d1e635c541b43f03536 Mon Sep 17 00:00:00 2001 From: Johannes Pietrzyk Date: Mon, 21 Aug 2023 12:06:10 +0200 Subject: [PATCH 15/47] changed implementation of modulo for fpga --- primitive_data/primitives/calc.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/primitive_data/primitives/calc.yaml b/primitive_data/primitives/calc.yaml index 64af553c..7194e14d 100644 --- a/primitive_data/primitives/calc.yaml +++ b/primitive_data/primitives/calc.yaml @@ -1234,7 +1234,7 @@ definitions: T result; //initialize the result #pragma unroll for(int i = 0; i < Vec::vector_element_count(); ++i) { - result[i] = vec[i] - ((vec[i] / val) * val); + result[i] = vec[i] % val;//vec[i] - ((vec[i] / val) * val); } return result; ... From 42a0e5d7eca47bb8c9d0d10a573787701d94449c Mon Sep 17 00:00:00 2001 From: Johannes Pietrzyk Date: Tue, 22 Aug 2023 10:26:14 +0000 Subject: [PATCH 16/47] removed assumed_aligned from fpga load --- primitive_data/primitives/ls.yaml | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/primitive_data/primitives/ls.yaml b/primitive_data/primitives/ls.yaml index 2839c15e..348dd861 100644 --- a/primitive_data/primitives/ls.yaml +++ b/primitive_data/primitives/ls.yaml @@ -84,10 +84,9 @@ definitions: implementation: | using T = typename Vec::register_type; T reg; //initialize the result - auto mem = assume_aligned(memory); #pragma unroll for (size_t idx = 0; idx < Vec::vector_element_count(); idx++) { - reg[idx] = mem[idx]; + reg[idx] = memory[idx]; } return reg; ... From 2490cb14111cc52f5e0d7210dfdca0bdfc353691 Mon Sep 17 00:00:00 2001 From: Johannes Pietrzyk Date: Tue, 22 Aug 2023 16:55:40 +0200 Subject: [PATCH 17/47] Added first steps into TSL-runtime --- generator/config/default_conf.yaml | 2 + .../generator/tsl_generator_schema.yaml | 5 + .../expansions/cmake_lib.template | 14 +- generator/core/ctrl/tsl_lib.py | 19 +- generator/core/ctrl/tsl_libfile_generator.py | 7 +- generator/core/model/tsl_file.py | 7 + generator/expansions/tsl_cmake.py | 1 + generator/static_files/core/tsl_static.yaml | 1 + .../static_files/core/utils/runtime.yaml | 41 ++++ main.py | 2 + .../extensions/simd/intel/avx2.yaml | 1 + .../extensions/simd/intel/avx512.yaml | 1 + .../extensions/simd/intel/fpga_generic.yaml | 1 + .../extensions/simd/intel/fpga_native.yaml | 2 +- primitive_data/extensions/simd/intel/sse.yaml | 1 + .../runtime/cpu/include/tslCPUrt.hpp | 77 ++++++++ .../oneApiFPGA/include/tslOneAPIrt.hpp | 177 ++++++++++++++++++ 17 files changed, 346 insertions(+), 13 deletions(-) create mode 100644 generator/static_files/core/utils/runtime.yaml mode change 100644 => 100755 main.py create mode 100644 supplementary/runtime/cpu/include/tslCPUrt.hpp create mode 100644 supplementary/runtime/oneApiFPGA/include/tslOneAPIrt.hpp diff --git a/generator/config/default_conf.yaml b/generator/config/default_conf.yaml index ceed1efa..f1df6cc2 100644 --- a/generator/config/default_conf.yaml +++ b/generator/config/default_conf.yaml @@ -52,4 +52,6 @@ configuration_files: lib: "core" supplementary: root_path: "supplementary" + runtime: + root_path: "runtime" ... diff --git a/generator/config/generator/tsl_generator_schema.yaml b/generator/config/generator/tsl_generator_schema.yaml index 3336eaf8..5d20406c 100644 --- a/generator/config/generator/tsl_generator_schema.yaml +++ b/generator/config/generator/tsl_generator_schema.yaml @@ -106,6 +106,11 @@ extension: example: "{sse4_1: 'msse4.1', sse4_2: 'msse4.2'}" default: "{}" includes: *includes + runtime_headers: + type: "list" + entry_type: "str" + default: [] + brief: "List of headers that are associated with the runtime (starting from ./supplementary/runtime)" required_supplementary_libraries: type: "list" brief: "List of libraries which are required for this extension." diff --git a/generator/config/generator/tsl_templates/expansions/cmake_lib.template b/generator/config/generator/tsl_templates/expansions/cmake_lib.template index f39850a5..73202a93 100644 --- a/generator/config/generator/tsl_templates/expansions/cmake_lib.template +++ b/generator/config/generator/tsl_templates/expansions/cmake_lib.template @@ -33,14 +33,12 @@ add_subdirectory({{ dir }}) {% for supplementary_lib in tsl_required_supplementary_libraries %} add_subdirectory({{ supplementary_lib["cmakelists_path"] }}) {% endfor %} -# Add includes if present -{% for supplementary_lib in tsl_required_supplementary_libraries %} -{% if "include_path" in supplementary_lib %} -#target_include_directories({{ tsl_lib_name}} INTERFACE {{ supplementary_lib["include_path"] }}/) -target_include_directories({{ tsl_lib_name }} INTERFACE $ $ $ List[Dict[str, str]]: self.log(logging.WARNING, f"Supplementary library {entry_dict['name']} already added. Ignoring.") return result + @property + def relevant_runtime_headers(self) -> List[Path]: + result_set: Set[str] = set() + supplementary_root_path = Path(config.get_configuration_files_entry("supplementary")["root_path"]) + runtime_root_path = supplementary_root_path.joinpath(config.get_configuration_files_entry("supplementary")["runtime"]["root_path"]) + for extension in self.extension_set: + if "runtime_headers" in extension.data: + result_set.update([f"{runtime_root_path.joinpath(p)}" for p in extension.data["runtime_headers"]]) + return [Path(p) for p in result_set] + def copy_relevant_supplementary_files(self) -> None: supplementary_root_path = Path(config.generation_out_path) for libData in self.relevant_supplementary_libraries: shutil.rmtree(supplementary_root_path.joinpath(libData['cmakelists_path']).resolve(), ignore_errors=True) shutil.copytree(Path(libData['cmakelists_path']).resolve(), supplementary_root_path.joinpath(libData['cmakelists_path']).resolve()) - + to_copy = self.relevant_runtime_headers + #create all directories recursively + for fpath in to_copy: + runtime_dir = supplementary_root_path.joinpath(fpath).resolve().parent + runtime_dir.mkdir(parents=True, exist_ok=True) + #copy all files from to_copy to supplementary_root_path, ignoring whether they already exist and keeping the directory structure + for fpath in to_copy: + shutil.copy(fpath.resolve(), supplementary_root_path.joinpath(fpath).resolve(), follow_symlinks=True) @LogInit() def __init__(self, extension_set: TSLExtensionSet, primitive_class_set: TSLPrimitiveClassSet) -> None: diff --git a/generator/core/ctrl/tsl_libfile_generator.py b/generator/core/ctrl/tsl_libfile_generator.py index ce210118..f7448129 100644 --- a/generator/core/ctrl/tsl_libfile_generator.py +++ b/generator/core/ctrl/tsl_libfile_generator.py @@ -129,7 +129,7 @@ def __create_primitive_header_files(self, extension_set: TSLExtensionSet, self.__primitive_class_declarations.append(declaration_file) self.__primitive_class_definitions.extend(definition_files_per_extension_dict.values()) - def __create_static_header_files(self) -> None: + def __create_static_header_files(self, lib: TSLLib) -> None: self.log(logging.INFO, f"Starting generation of static header.") for static_yaml_file_path in config.static_lib_files(): if static_yaml_file_path.stem == config.lib_root_header.stem: @@ -159,7 +159,7 @@ def __init__(self, lib: TSLLib) -> None: self.__create_extension_header_files(lib.extension_set) self.__create_primitive_header_files(lib.extension_set, lib.primitive_class_set) - self.__create_static_header_files() + self.__create_static_header_files(lib) generated_files_root: TSLHeaderFile = TSLHeaderFile.create_from_dict(config.lib_generated_files_root_header, {}) for extension_file in self.extension_files: @@ -168,5 +168,6 @@ def __init__(self, lib: TSLLib) -> None: generated_files_root.add_file_include(primitive_declaration) for primitive_definition in self.primitive_definition_files: generated_files_root.add_file_include(primitive_definition) - + for runtime_header in lib.relevant_runtime_headers: + generated_files_root.add_predefined_tsl_file_include(f'"{runtime_header.name}"') self.__static_files.append(generated_files_root) diff --git a/generator/core/model/tsl_file.py b/generator/core/model/tsl_file.py index c0e70f46..82a76c9e 100644 --- a/generator/core/model/tsl_file.py +++ b/generator/core/model/tsl_file.py @@ -21,6 +21,8 @@ class TSLHeaderFile: def __init__(self, filename: Path, data_dict: YamlDataType) -> None: self.__filename = filename self.__data_dict = copy.deepcopy(data_dict) + if "tsl_predefined_file_includes" not in self.__data_dict: + self.__data_dict["tsl_predefined_file_includes"] = [] @property def data(self) -> YamlDataType: @@ -46,6 +48,10 @@ def __hash__(self): def add_file_include(self, header_file: TSLHeaderFile) -> None: if header_file not in self.__data_dict["tsl_file_includes"]: self.__data_dict["tsl_file_includes"].append(header_file) + + def add_predefined_tsl_file_include(self, header_file_str: str) -> None: + if header_file_str not in self.__data_dict["tsl_predefined_file_includes"]: + self.__data_dict["tsl_predefined_file_includes"].append(header_file_str) def add_include(self, include: str) -> None: if include not in self.__data_dict["includes"]: @@ -70,6 +76,7 @@ def render(self) -> str: current_path: Path = self.file_name.parent tsl_file_includes = [f"\"{Path(os.path.relpath(Path(included_file.file_name), current_path))}\"" for included_file in self.__data_dict["tsl_file_includes"]] self.__data_dict["includes"].extend([tsl_include for tsl_include in tsl_file_includes if tsl_include not in self.__data_dict["includes"]]) + self.__data_dict["includes"].extend(self.__data_dict["tsl_predefined_file_includes"]) return config.get_template("core::header_file").render(self.__data_dict) def render_to_file(self) -> None: diff --git a/generator/expansions/tsl_cmake.py b/generator/expansions/tsl_cmake.py index 2daa6399..ec3065ce 100644 --- a/generator/expansions/tsl_cmake.py +++ b/generator/expansions/tsl_cmake.py @@ -46,6 +46,7 @@ def get_warning_options() -> str: "library_root_path": f"{strip_common_path_prefix(config.lib_root_path, config.generation_out_path)}/", "tsl_target_compile_options": f"{get_architecture_flags(lib)} {get_warning_options()} -flax-vector-conversions", "tsl_required_supplementary_libraries": lib.relevant_supplementary_libraries, + "tsl_additional_include_paths": [f"{p.parent}" for p in lib.relevant_runtime_headers], "use_concepts": config.use_concepts, "subdirectories": [strip_common_path_prefix(path, config.generation_out_path) for path, translation_units in translation_units.translation_units] } diff --git a/generator/static_files/core/tsl_static.yaml b/generator/static_files/core/tsl_static.yaml index 44e2f943..9a75ece6 100755 --- a/generator/static_files/core/tsl_static.yaml +++ b/generator/static_files/core/tsl_static.yaml @@ -8,4 +8,5 @@ includes: - '"simd/simd_type_concepts.hpp"' - '"simd/simd_type.hpp"' - '"simd/simd_primitive_concepts.hpp"' + - '"utils/runtime.hpp"' ... diff --git a/generator/static_files/core/utils/runtime.yaml b/generator/static_files/core/utils/runtime.yaml new file mode 100644 index 00000000..ce6501a0 --- /dev/null +++ b/generator/static_files/core/utils/runtime.yaml @@ -0,0 +1,41 @@ +--- +file_description: "Static header that defines the runtime." +includes: + - "" + - "" +implementations: + - | + template + class executor { + private: + ExecTarget target; + public: + template + executor(Args&&... args) + : target{std::forward(args)...} + {} + public: + + template + auto allocate(Args&& ... args) { + return target.template allocate(std::forward(args)...); + } + template + void deallocate(T ptr) { + return target.deallocate(ptr); + } + template + void copy(OutT out, InT in, size_t element_count) { + target.copy(out, in, element_count); + } + public: + template class Fun, typename... Args> + decltype(auto) submit(Args... args) { + return target.template submit(args...); + } + template class Fun, typename... Args> + decltype(auto) submit(Args... args) { + return target.template submit(args...); + } + }; +... \ No newline at end of file diff --git a/main.py b/main.py old mode 100644 new mode 100755 index 43cb5ed4..93e3d1c5 --- a/main.py +++ b/main.py @@ -1,3 +1,4 @@ +#!/usr/bin/env python import time from generator.core.tsl_config import config, parse_args @@ -60,6 +61,7 @@ def tsl_setup(file_config, additional_config=None) -> None: sys.stdout.flush() exit(0) else: + print(f"Generating for {args_dict['targets']}") gen.generate(args_dict["targets"]) print("Generation needed %.2f seconds." % (time.time() - st)) diff --git a/primitive_data/extensions/simd/intel/avx2.yaml b/primitive_data/extensions/simd/intel/avx2.yaml index 0086d8c3..9e2bc65b 100644 --- a/primitive_data/extensions/simd/intel/avx2.yaml +++ b/primitive_data/extensions/simd/intel/avx2.yaml @@ -22,4 +22,5 @@ simdT_integral_mask_type: |- ) intrin_tp: {uint8_t: ["epu", 8], uint16_t: ["epu", 16], uint32_t: ["epu", 32], uint64_t: ["epu", 64], int8_t: ["epi", 8], int16_t: ["epi", 16], int32_t: ["epi", 32], int64_t: ["epi", 64], float: ["p", "s"], double: ["p", "d"]} intrin_tp_full: {uint8_t: "epu8", uint16_t: "epu16", uint32_t: "epu32", uint64_t: "epi64", int8_t: "epi8", int16_t: "epi16", int32_t: "epi32", int64_t: "epi64", float: "ps", double: "pd"} +runtime_headers: ["cpu/include/tslCPUrt.hpp"] ... diff --git a/primitive_data/extensions/simd/intel/avx512.yaml b/primitive_data/extensions/simd/intel/avx512.yaml index ffb54ee1..b5e71e8b 100644 --- a/primitive_data/extensions/simd/intel/avx512.yaml +++ b/primitive_data/extensions/simd/intel/avx512.yaml @@ -45,4 +45,5 @@ simdT_mask_type: |- ) intrin_tp: {uint8_t: ["epu", 8], uint16_t: ["epu", 16], uint32_t: ["epu", 32], uint64_t: ["epu", 64], int8_t: ["epi", 8], int16_t: ["epi", 16], int32_t: ["epi", 32], int64_t: ["epi", 64], float: ["p", "s"], double: ["p", "d"]} intrin_tp_full: {uint8_t: "epu8", uint16_t: "epu16", uint32_t: "epu32", uint64_t: "epu64", int8_t: "epi8", int16_t: "epi16", int32_t: "epi32", int64_t: "epi64", float: "ps", double: "pd"} +runtime_headers: ["cpu/include/tslCPUrt.hpp"] ... diff --git a/primitive_data/extensions/simd/intel/fpga_generic.yaml b/primitive_data/extensions/simd/intel/fpga_generic.yaml index fe9880bf..8bd30f27 100644 --- a/primitive_data/extensions/simd/intel/fpga_generic.yaml +++ b/primitive_data/extensions/simd/intel/fpga_generic.yaml @@ -12,5 +12,6 @@ simdT_mask_type_compiler_attributes: "__attribute__((register))" simdT_register_type: "std::array" #this can also be a custom structure or a pointer, but I would *HIGHLY* recommend, using an array simdT_mask_type: "std::array" #we can define the mask type as a register type following the general pattern of SSE and AVX is_generic: True +runtime_headers: ["oneApiFPGA/include/tslOneAPIrt.hpp"] #simdT_mask_type: "std::bitset" #the mask-type should contain a bit for every entry within the register_type. maybe we can substitute the bitset with an integral value at some point ... diff --git a/primitive_data/extensions/simd/intel/fpga_native.yaml b/primitive_data/extensions/simd/intel/fpga_native.yaml index 05752694..90e2794f 100644 --- a/primitive_data/extensions/simd/intel/fpga_native.yaml +++ b/primitive_data/extensions/simd/intel/fpga_native.yaml @@ -18,6 +18,6 @@ required_supplementary_libraries: cmakelists_path: "oneApiFPGA" library_create_function: "create_one_api_fpga_library" include_path: "oneApiFPGA/include" - +runtime_headers: ["oneApiFPGA/include/tslOneAPIrt.hpp"] #simdT_mask_type: "std::bitset" #the mask-type should contain a bit for every entry within the register_type. maybe we can substitute the bitset with an integral value at some point ... diff --git a/primitive_data/extensions/simd/intel/sse.yaml b/primitive_data/extensions/simd/intel/sse.yaml index b9808856..5f26620c 100644 --- a/primitive_data/extensions/simd/intel/sse.yaml +++ b/primitive_data/extensions/simd/intel/sse.yaml @@ -19,4 +19,5 @@ simdT_integral_mask_type: |- ) intrin_tp: {uint8_t: ["epu", 8], uint16_t: ["epu", 16], uint32_t: ["epu", 32], uint64_t: ["epu", 64], int8_t: ["epi", 8], int16_t: ["epi", 16], int32_t: ["epi", 32], int64_t: ["epi", 64], float: ["p", "s"], double: ["p", "d"]} intrin_tp_full: {uint8_t: "epu8", uint16_t: "epu16", uint32_t: "epu32", uint64_t: "epi64", int8_t: "epi8", int16_t: "epi16", int32_t: "epi32", int64_t: "epi64", float: "ps", double: "pd"} +runtime_headers: ["cpu/include/tslCPUrt.hpp"] ... diff --git a/supplementary/runtime/cpu/include/tslCPUrt.hpp b/supplementary/runtime/cpu/include/tslCPUrt.hpp new file mode 100644 index 00000000..cc6605e6 --- /dev/null +++ b/supplementary/runtime/cpu/include/tslCPUrt.hpp @@ -0,0 +1,77 @@ +#ifndef TSL_SUPPLEMENTARY_RUNTIME_CPU_TSLCPURT_HPP +#define TSL_SUPPLEMENTARY_RUNTIME_CPU_TSLCPURT_HPP +#include +#include +#include +#include + + +namespace tsl { + namespace runtime { + class cpu { + public: + cpu() = default; + public: + template + auto allocate(size_t element_count, size_t alignment = 0) { + T * buffer; + if (alignment == 0) { + if ((buffer = reinterpret_cast(malloc(element_count*sizeof(T)))) == nullptr) { + std::cerr << "ERROR: could not allocate space on host" << std::endl; + std::terminate(); + } + } else { + if ((buffer = reinterpret_cast(std::aligned_alloc(alignment, element_count*sizeof(T)))) == nullptr) { + std::cerr << "ERROR: could not allocate space on host" << std::endl; + std::terminate(); + } + } + return buffer; + } + template + void deallocate(T ptr) { + if constexpr(std::is_pointer_v>) { + free(ptr); + } else { + std::cerr << "Can only free a pointer." << std::endl; + std::terminate(); + } + + } + template + void copy(OutT out, InT in, size_t element_count) { + if constexpr( + std::is_pointer_v> && + std::is_pointer_v> + ) { + std::memcpy(out, in, element_count*sizeof(InT)); + } else { + for (size_t i = 0; i < element_count; ++i) { + out[i] = in[i]; + } + } + } + public: + template class Fun, typename... Args> + decltype(auto) submit(Args... args) { + return Fun::apply(args...); + } + template class Fun, typename... Args> + decltype(auto) submit(Args... args) { + if constexpr(VectorLength == 1) { + return Fun, Args...>::apply(args...); + } else if constexpr(sizeof(BaseT)*8*VectorLength == 128) { + return Fun, Args...>::apply(args...); + } else if constexpr(sizeof(BaseT)*8*VectorLength == 256) { + return Fun, Args...>::apply(args...); + } else if constexpr(sizeof(BaseT)*8*VectorLength == 512) { + return Fun, Args...>::apply(args...); + } else { + std::cerr << "ERROR: unsupported vector length" << std::endl; + std::terminate(); + } + } + }; + } +} +#endif \ No newline at end of file diff --git a/supplementary/runtime/oneApiFPGA/include/tslOneAPIrt.hpp b/supplementary/runtime/oneApiFPGA/include/tslOneAPIrt.hpp new file mode 100644 index 00000000..0b16d47a --- /dev/null +++ b/supplementary/runtime/oneApiFPGA/include/tslOneAPIrt.hpp @@ -0,0 +1,177 @@ +#ifndef TSL_SUPPLEMENTARY_RUNTIME_ONEAPIFPGA_TSLONEAPIRT_HPP +#define TSL_SUPPLEMENTARY_RUNTIME_ONEAPIFPGA_TSLONEAPIRT_HPP + +#include +#include +#include +#include +#include +#include +#include + +namespace tsl { + namespace oneAPI { + struct MEMORY_ON_HOST{}; + struct MEMORY_ON_DEVICE{}; + } + namespace runtime { + + class oneAPI_helper { + protected: + static void exception_handler(sycl::exception_list exceptions) { + for (std::exception_ptr const& e : exceptions) { + try { + std::rethrow_exception(e); + } catch (sycl::exception const& e) { + std::cerr << "Caught asynchronous SYCL exception:\n" + << e.what() << std::endl; + std::terminate(); + } + } + } + }; + struct oneAPI_emulator_selector: public oneAPI_helper { + #ifdef SYCL_SELECTOR_CLASS_DEPRECATED + sycl::queue q; + oneAPI_emulator_selector(auto&& one_api_queue_properties) + : q{sycl::ext::intel::fpga_emulator_selector_v, oneAPI_helper::exception_handler, one_api_queue_properties} { + std::cout << "Using FPGA Emulator with fpga_emulator_selector_v" << std::endl; + } + #else + sycl::ext::intel::fpga_emulator_selector selector; + sycl::queue q; + oneAPI_emulator_selector(auto&& one_api_queue_properties) + : selector{}, + q{selector, oneAPI_helper::exception_handler, one_api_queue_properties} { + std::cout << "Using FPGA Emulator with fpga_emulator_selector" << std::endl; + } + #endif + }; + struct oneAPI_hardware_selector: public oneAPI_helper { + #ifdef SYCL_SELECTOR_CLASS_DEPRECATED + sycl::queue q; + oneAPI_hardware_selector(auto&& one_api_queue_properties) + : q{sycl::ext::intel::fpga_selector_v, oneAPI_helper::exception_handler, one_api_queue_properties} { + std::cout << "Using FPGA Hardware with fpga_selector_v" << std::endl; + } + #else + sycl::ext::intel::fpga_selector selector; + sycl::queue q; + oneAPI_hardware_selector(auto&& one_api_queue_properties) + : selector{}, + q{selector, oneAPI_helper::exception_handler, one_api_queue_properties} { + std::cout << "Using FPGA Hardware with fpga_selector" << std::endl; + } + #endif + }; + + template + class oneAPI_fpga { + private: + Selector selector; + sycl::queue& q; + public: + oneAPI_fpga( + auto&& one_api_queue_properties + ): selector{one_api_queue_properties}, + q{selector.q} + { + // make sure the device supports USM device allocations + sycl::device d = q.get_device(); + if (!d.get_info()) { + std::cerr << "ERROR: The selected device does not support USM device" + << " allocations" << std::endl; + std::terminate(); + } + if (!d.get_info()) { + std::cerr << "ERROR: The selected device does not support USM host" + << " allocations" << std::endl; + std::terminate(); + } + } + public: + template + auto allocate(size_t element_count, ::tsl::oneAPI::MEMORY_ON_HOST, size_t alignment = 0) { + using T = std::remove_pointer_t>; + T * buffer; + if (alignment == 0) { + if ((buffer = sycl::malloc_host(element_count*sizeof(T), q)) == nullptr) { + std::cerr << "ERROR: could not allocate space on host" << std::endl; + std::terminate(); + } + } else { + if ((buffer = sycl::aligned_alloc_host(alignment, element_count*sizeof(T), q)) == nullptr) { + std::cerr << "ERROR: could not allocate space on host" << std::endl; + std::terminate(); + } + } + return sycl::host_ptr{buffer}; + } + template + auto allocate(size_t element_count, ::tsl::oneAPI::MEMORY_ON_DEVICE, size_t alignment = 0) { + using T = std::remove_pointer_t>; + T * buffer; + if (alignment == 0) { + if ((buffer = sycl::malloc_device(element_count*sizeof(T), q)) == nullptr) { + std::cerr << "ERROR: could not allocate space on host" << std::endl; + std::terminate(); + } + } else { + if ((buffer = sycl::aligned_alloc_device(alignment, element_count*sizeof(T), q)) == nullptr) { + std::cerr << "ERROR: could not allocate space on host" << std::endl; + std::terminate(); + } + } + return sycl::device_ptr{buffer}; + } + template + void deallocate(T ptr) { + if constexpr(std::is_pointer_v>) { + sycl::free(ptr, q); + } else { + sycl::free(ptr.get(), q); + } + } + template + void copy(OutT out, InT in, size_t element_count) { + if constexpr( + std::is_pointer_v> && + std::is_pointer_v> + ) { + q.memcpy(out, in, element_count * sizeof(InT)); + } else { + q.memcpy(out.get(), in.get(), element_count * sizeof(InT)); + } + q.wait(); + } + public: + template class Fun, typename... Args> + decltype(auto) submit(Args... args) { + // Check that all elements of type Args are of type oneAPI_ptr_wrapper + // static_assert((std::is_same_v> && ...), "All arguments must be of type oneAPI_ptr_wrapper"); + return q.submit( + [&](sycl::handler& h) { + h.single_task>([=]() [[intel::kernel_args_restrict]] { + return Fun::apply(args...); + }); + } + ).wait(); + } + template class Fun, typename... Args> + decltype(auto) submit(Args... args) { + using FunctorClass = Fun, Args...>; + // Check that all elements of type Args are of type oneAPI_ptr_wrapper + // static_assert((std::is_same_v> && ...), "All arguments must be of type oneAPI_ptr_wrapper"); + return q.submit( + [&](sycl::handler& h) { + h.single_task([=]() [[intel::kernel_args_restrict]] { + return FunctorClass::apply(args...); + }); + } + ).wait(); + } + }; + } +} + +#endif \ No newline at end of file From 18a15123c53dc675761dcb94d528b0883395988c Mon Sep 17 00:00:00 2001 From: Johannes Pietrzyk Date: Tue, 22 Aug 2023 17:50:46 +0200 Subject: [PATCH 18/47] Changed CPU Runtime for Intel --- generator/core/ctrl/tsl_lib.py | 38 ++++++++++++++++--- generator/core/ctrl/tsl_libfile_generator.py | 4 +- .../runtime/cpu/include/tslCPUrt.hpp | 16 ++++---- 3 files changed, 43 insertions(+), 15 deletions(-) diff --git a/generator/core/ctrl/tsl_lib.py b/generator/core/ctrl/tsl_lib.py index 35d2108a..90e1c1b8 100644 --- a/generator/core/ctrl/tsl_lib.py +++ b/generator/core/ctrl/tsl_lib.py @@ -61,19 +61,47 @@ def relevant_runtime_headers(self) -> List[Path]: result_set.update([f"{runtime_root_path.joinpath(p)}" for p in extension.data["runtime_headers"]]) return [Path(p) for p in result_set] + @property + def runtime_headers_with_extension_dict(self) -> Dict[str, List[dict]]: + result: Dict[str, List[dict]] = dict() + + supplementary_root_path = Path(config.get_configuration_files_entry("supplementary")["root_path"]) + runtime_root_path = supplementary_root_path.joinpath(config.get_configuration_files_entry("supplementary")["runtime"]["root_path"]) + for extension in self.extension_set: + if "runtime_headers" in extension.data: + for header in extension.data["runtime_headers"]: + headerFile = f"{runtime_root_path.joinpath(header)}" + if headerFile not in result: + result[headerFile] = list() + result[headerFile].append(extension.data) + return result + def copy_relevant_supplementary_files(self) -> None: supplementary_root_path = Path(config.generation_out_path) for libData in self.relevant_supplementary_libraries: shutil.rmtree(supplementary_root_path.joinpath(libData['cmakelists_path']).resolve(), ignore_errors=True) shutil.copytree(Path(libData['cmakelists_path']).resolve(), supplementary_root_path.joinpath(libData['cmakelists_path']).resolve()) - to_copy = self.relevant_runtime_headers + runtime_headers_dict = self.runtime_headers_with_extension_dict #create all directories recursively - for fpath in to_copy: + for fpath in runtime_headers_dict: runtime_dir = supplementary_root_path.joinpath(fpath).resolve().parent runtime_dir.mkdir(parents=True, exist_ok=True) - #copy all files from to_copy to supplementary_root_path, ignoring whether they already exist and keeping the directory structure - for fpath in to_copy: - shutil.copy(fpath.resolve(), supplementary_root_path.joinpath(fpath).resolve(), follow_symlinks=True) + for fpath in runtime_headers_dict: + associated_extensions = runtime_headers_dict[fpath] + runtime_relevant_data_dict = { + "avail_extension_types_dict": { + extension_data["simdT_default_size_in_bits"]: extension_data["simdT_name"] for extension_data in associated_extensions + } + } + #load file into string + print(f"Reading from {Path(fpath).resolve()}") + print(f"Writing to {supplementary_root_path.joinpath(fpath).resolve()}") + with open(Path(fpath).resolve(), 'r') as runtime_header_input, open(supplementary_root_path.joinpath(fpath).resolve(), 'w') as runtime_header_output: + file_content = runtime_header_input.read() + #create a jinja template from the file + template = config.create_template(file_content) + runtime_header_output.write(template.render(runtime_relevant_data_dict)) + # shutil.copy(fpath.resolve(), supplementary_root_path.joinpath(fpath).resolve(), follow_symlinks=True) @LogInit() def __init__(self, extension_set: TSLExtensionSet, primitive_class_set: TSLPrimitiveClassSet) -> None: diff --git a/generator/core/ctrl/tsl_libfile_generator.py b/generator/core/ctrl/tsl_libfile_generator.py index f7448129..be49bfca 100644 --- a/generator/core/ctrl/tsl_libfile_generator.py +++ b/generator/core/ctrl/tsl_libfile_generator.py @@ -129,7 +129,7 @@ def __create_primitive_header_files(self, extension_set: TSLExtensionSet, self.__primitive_class_declarations.append(declaration_file) self.__primitive_class_definitions.extend(definition_files_per_extension_dict.values()) - def __create_static_header_files(self, lib: TSLLib) -> None: + def __create_static_header_files(self) -> None: self.log(logging.INFO, f"Starting generation of static header.") for static_yaml_file_path in config.static_lib_files(): if static_yaml_file_path.stem == config.lib_root_header.stem: @@ -159,7 +159,7 @@ def __init__(self, lib: TSLLib) -> None: self.__create_extension_header_files(lib.extension_set) self.__create_primitive_header_files(lib.extension_set, lib.primitive_class_set) - self.__create_static_header_files(lib) + self.__create_static_header_files() generated_files_root: TSLHeaderFile = TSLHeaderFile.create_from_dict(config.lib_generated_files_root_header, {}) for extension_file in self.extension_files: diff --git a/supplementary/runtime/cpu/include/tslCPUrt.hpp b/supplementary/runtime/cpu/include/tslCPUrt.hpp index cc6605e6..6a4cb18d 100644 --- a/supplementary/runtime/cpu/include/tslCPUrt.hpp +++ b/supplementary/runtime/cpu/include/tslCPUrt.hpp @@ -4,7 +4,7 @@ #include #include #include - +#include namespace tsl { namespace runtime { @@ -60,13 +60,13 @@ namespace tsl { decltype(auto) submit(Args... args) { if constexpr(VectorLength == 1) { return Fun, Args...>::apply(args...); - } else if constexpr(sizeof(BaseT)*8*VectorLength == 128) { - return Fun, Args...>::apply(args...); - } else if constexpr(sizeof(BaseT)*8*VectorLength == 256) { - return Fun, Args...>::apply(args...); - } else if constexpr(sizeof(BaseT)*8*VectorLength == 512) { - return Fun, Args...>::apply(args...); - } else { + } + {% for avail_extension_type_size in avail_extension_types_dict %} + else if constexpr(sizeof(BaseT)*CHAR_BIT*VectorLength == {{ avail_extension_type_size }}) { + return Fun, Args...>::apply(args...); + } + {% endfor %} + else { std::cerr << "ERROR: unsupported vector length" << std::endl; std::terminate(); } From efd9eb6218e131b49b943056d6273eade2d1d6da Mon Sep 17 00:00:00 2001 From: Johannes Pietrzyk Date: Wed, 23 Aug 2023 08:29:46 +0200 Subject: [PATCH 19/47] Added default executor for oneapi FPGA --- supplementary/runtime/oneApiFPGA/include/tslOneAPIrt.hpp | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/supplementary/runtime/oneApiFPGA/include/tslOneAPIrt.hpp b/supplementary/runtime/oneApiFPGA/include/tslOneAPIrt.hpp index 0b16d47a..710ca307 100644 --- a/supplementary/runtime/oneApiFPGA/include/tslOneAPIrt.hpp +++ b/supplementary/runtime/oneApiFPGA/include/tslOneAPIrt.hpp @@ -171,6 +171,11 @@ namespace tsl { ).wait(); } }; + #ifdef ONEAPI_FPGA_HARDWARE + using oneAPI_default_fpga = oneAPI_fpga; + #else + using oneAPI_default_fpga = oneAPI_fpga; + #endif } } From 2ef4d1b9c209e2116a19afb817c00ba4ca77faa8 Mon Sep 17 00:00:00 2001 From: Johannes Pietrzyk Date: Wed, 23 Aug 2023 12:37:06 +0200 Subject: [PATCH 20/47] Extended FPGA copy --- supplementary/runtime/oneApiFPGA/include/tslOneAPIrt.hpp | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/supplementary/runtime/oneApiFPGA/include/tslOneAPIrt.hpp b/supplementary/runtime/oneApiFPGA/include/tslOneAPIrt.hpp index 710ca307..bf2a7f23 100644 --- a/supplementary/runtime/oneApiFPGA/include/tslOneAPIrt.hpp +++ b/supplementary/runtime/oneApiFPGA/include/tslOneAPIrt.hpp @@ -6,6 +6,7 @@ #include #include #include +#include #include #include @@ -139,6 +140,10 @@ namespace tsl { std::is_pointer_v> ) { q.memcpy(out, in, element_count * sizeof(InT)); + } else if constexpr(std::is_pointer_v>){ + q.memcpy(out, in.get(), element_count * sizeof(InT)); + } else if constexpr(std::is_pointer_v>) { + q.memcpy(out.get(), in, element_count * sizeof(InT)); } else { q.memcpy(out.get(), in.get(), element_count * sizeof(InT)); } @@ -159,7 +164,7 @@ namespace tsl { } template class Fun, typename... Args> decltype(auto) submit(Args... args) { - using FunctorClass = Fun, Args...>; + using FunctorClass = Fun, Args...>; // Check that all elements of type Args are of type oneAPI_ptr_wrapper // static_assert((std::is_same_v> && ...), "All arguments must be of type oneAPI_ptr_wrapper"); return q.submit( From d496b8a1d58785b5c6721be059846d8e738041e5 Mon Sep 17 00:00:00 2001 From: Johannes Pietrzyk Date: Wed, 23 Aug 2023 15:06:21 +0200 Subject: [PATCH 21/47] Added helper function to cope with oneApi refactored typedefs --- .../runtime/oneApiFPGA/include/tslOneAPIrt.hpp | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/supplementary/runtime/oneApiFPGA/include/tslOneAPIrt.hpp b/supplementary/runtime/oneApiFPGA/include/tslOneAPIrt.hpp index bf2a7f23..b6afb7c1 100644 --- a/supplementary/runtime/oneApiFPGA/include/tslOneAPIrt.hpp +++ b/supplementary/runtime/oneApiFPGA/include/tslOneAPIrt.hpp @@ -14,6 +14,24 @@ namespace tsl { namespace oneAPI { struct MEMORY_ON_HOST{}; struct MEMORY_ON_DEVICE{}; + + namespace details { + // as the current version of multi_ptr exposes the underlying element type with using "value_type" but older versions call it "element_type", we need this little helper function + template + struct has_typedef_element_type : std::false_type {}; + template + struct has_typedef_element_type> : std::true_type {}; + template + struct has_typedef_value_type : std::false_type {}; + template + struct has_typedef_value_type> : std::true_type {}; + } + template + using multi_ptr_base_type = std::conditional_t< + details::has_typedef_element_type::value, + typename MultiPtrClass::element_type, + typename MultiPtrClass::value_type + >; } namespace runtime { From 58348e84dced17cd62b6853178ace36b756fd829 Mon Sep 17 00:00:00 2001 From: Johannes Pietrzyk Date: Wed, 23 Aug 2023 15:21:25 +0200 Subject: [PATCH 22/47] Changed the type helper for multi_ptr --- .../oneApiFPGA/include/tslOneAPIrt.hpp | 21 +++++++------------ 1 file changed, 8 insertions(+), 13 deletions(-) diff --git a/supplementary/runtime/oneApiFPGA/include/tslOneAPIrt.hpp b/supplementary/runtime/oneApiFPGA/include/tslOneAPIrt.hpp index b6afb7c1..ee5571ce 100644 --- a/supplementary/runtime/oneApiFPGA/include/tslOneAPIrt.hpp +++ b/supplementary/runtime/oneApiFPGA/include/tslOneAPIrt.hpp @@ -15,23 +15,18 @@ namespace tsl { struct MEMORY_ON_HOST{}; struct MEMORY_ON_DEVICE{}; - namespace details { - // as the current version of multi_ptr exposes the underlying element type with using "value_type" but older versions call it "element_type", we need this little helper function + namespace details { template - struct has_typedef_element_type : std::false_type {}; + struct multi_ptr_base_type { + using type = typename T::value_type; + }; template - struct has_typedef_element_type> : std::true_type {}; - template - struct has_typedef_value_type : std::false_type {}; - template - struct has_typedef_value_type> : std::true_type {}; + struct multi_ptr_base_type> { + using type = typename T::element_type; + }; } template - using multi_ptr_base_type = std::conditional_t< - details::has_typedef_element_type::value, - typename MultiPtrClass::element_type, - typename MultiPtrClass::value_type - >; + using multi_ptr_base_type = typename details::multi_ptr_base_type::type; } namespace runtime { From fee6e85b098e9e2bd771fa835ad4e206c3f6f24b Mon Sep 17 00:00:00 2001 From: Johannes Pietrzyk Date: Thu, 24 Aug 2023 10:24:35 +0200 Subject: [PATCH 23/47] FIxed bug in FPGA-runtime copy --- .../oneApiFPGA/include/tslOneAPIrt.hpp | 20 +++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/supplementary/runtime/oneApiFPGA/include/tslOneAPIrt.hpp b/supplementary/runtime/oneApiFPGA/include/tslOneAPIrt.hpp index ee5571ce..0d5a350f 100644 --- a/supplementary/runtime/oneApiFPGA/include/tslOneAPIrt.hpp +++ b/supplementary/runtime/oneApiFPGA/include/tslOneAPIrt.hpp @@ -147,26 +147,28 @@ namespace tsl { } } template - void copy(OutT out, InT in, size_t element_count) { - if constexpr( + void copy(OutT out, InT in, size_t element_count) { + if constexpr( //If pointers are passed to copy, we could use them directly, since we assume them to be raw pointers. This is quite shaky, but it works for now. std::is_pointer_v> && std::is_pointer_v> ) { - q.memcpy(out, in, element_count * sizeof(InT)); + using InBaseT = typename std::remove_pointer_t>; + q.memcpy(out, in, element_count * sizeof(InBaseT)); } else if constexpr(std::is_pointer_v>){ - q.memcpy(out, in.get(), element_count * sizeof(InT)); + using InBaseT = typename oneAPI::multi_ptr_base_type; + q.memcpy(out, in.get(), element_count * sizeof(InBaseT)); } else if constexpr(std::is_pointer_v>) { - q.memcpy(out.get(), in, element_count * sizeof(InT)); + using InBaseT = typename std::remove_pointer_t>; + q.memcpy(out.get(), in, element_count * sizeof(InBaseT)); } else { - q.memcpy(out.get(), in.get(), element_count * sizeof(InT)); + using InBaseT = typename oneAPI::multi_ptr_base_type; + q.memcpy(out.get(), in.get(), element_count * sizeof(InBaseT)); } q.wait(); } public: template class Fun, typename... Args> decltype(auto) submit(Args... args) { - // Check that all elements of type Args are of type oneAPI_ptr_wrapper - // static_assert((std::is_same_v> && ...), "All arguments must be of type oneAPI_ptr_wrapper"); return q.submit( [&](sycl::handler& h) { h.single_task>([=]() [[intel::kernel_args_restrict]] { @@ -178,8 +180,6 @@ namespace tsl { template class Fun, typename... Args> decltype(auto) submit(Args... args) { using FunctorClass = Fun, Args...>; - // Check that all elements of type Args are of type oneAPI_ptr_wrapper - // static_assert((std::is_same_v> && ...), "All arguments must be of type oneAPI_ptr_wrapper"); return q.submit( [&](sycl::handler& h) { h.single_task([=]() [[intel::kernel_args_restrict]] { From f39595804d66036e2f320121326fb8a4674f7caf Mon Sep 17 00:00:00 2001 From: Johannes Pietrzyk Date: Fri, 25 Aug 2023 13:44:59 +0200 Subject: [PATCH 24/47] added helper type --- supplementary/runtime/oneApiFPGA/include/tslOneAPIrt.hpp | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/supplementary/runtime/oneApiFPGA/include/tslOneAPIrt.hpp b/supplementary/runtime/oneApiFPGA/include/tslOneAPIrt.hpp index 0d5a350f..9589f3ab 100644 --- a/supplementary/runtime/oneApiFPGA/include/tslOneAPIrt.hpp +++ b/supplementary/runtime/oneApiFPGA/include/tslOneAPIrt.hpp @@ -27,6 +27,14 @@ namespace tsl { } template using multi_ptr_base_type = typename details::multi_ptr_base_type::type; + + template + using memory_base_type = + std::conditional_t< + std::is_pointer_v>, + std::remove_pointer_t>, + typename multi_ptr_base_type> + >; } namespace runtime { From 4051f66e533cace47bdfdb9a26c36d9c662eeacd Mon Sep 17 00:00:00 2001 From: Johannes Pietrzyk Date: Fri, 25 Aug 2023 14:07:22 +0200 Subject: [PATCH 25/47] Fixed a bug --- supplementary/runtime/oneApiFPGA/include/tslOneAPIrt.hpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/supplementary/runtime/oneApiFPGA/include/tslOneAPIrt.hpp b/supplementary/runtime/oneApiFPGA/include/tslOneAPIrt.hpp index 9589f3ab..0699aa20 100644 --- a/supplementary/runtime/oneApiFPGA/include/tslOneAPIrt.hpp +++ b/supplementary/runtime/oneApiFPGA/include/tslOneAPIrt.hpp @@ -33,7 +33,7 @@ namespace tsl { std::conditional_t< std::is_pointer_v>, std::remove_pointer_t>, - typename multi_ptr_base_type> + multi_ptr_base_type> >; } namespace runtime { From 1ef0bcfec002a28c6e5c5a9b4487221b4926388c Mon Sep 17 00:00:00 2001 From: Johannes Pietrzyk Date: Tue, 29 Aug 2023 10:09:00 +0200 Subject: [PATCH 26/47] Added fpga_loop_fuse to fpga executor --- supplementary/runtime/oneApiFPGA/include/tslOneAPIrt.hpp | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/supplementary/runtime/oneApiFPGA/include/tslOneAPIrt.hpp b/supplementary/runtime/oneApiFPGA/include/tslOneAPIrt.hpp index 0699aa20..52b76719 100644 --- a/supplementary/runtime/oneApiFPGA/include/tslOneAPIrt.hpp +++ b/supplementary/runtime/oneApiFPGA/include/tslOneAPIrt.hpp @@ -180,7 +180,9 @@ namespace tsl { return q.submit( [&](sycl::handler& h) { h.single_task>([=]() [[intel::kernel_args_restrict]] { - return Fun::apply(args...); + sycl::ext::intel::fpga_loop_fuse([&] { + return Fun::apply(args...); + }) }); } ).wait(); @@ -191,7 +193,9 @@ namespace tsl { return q.submit( [&](sycl::handler& h) { h.single_task([=]() [[intel::kernel_args_restrict]] { - return FunctorClass::apply(args...); + sycl::ext::intel::fpga_loop_fuse([&]{ + return FunctorClass::apply(args...); + }); }); } ).wait(); From f7c75a6d6a67ccae863186b6d9e5be6b60812dc4 Mon Sep 17 00:00:00 2001 From: Johannes Pietrzyk Date: Mon, 4 Sep 2023 13:30:52 +0200 Subject: [PATCH 27/47] Fixed hadd for BIG simd-registers --- primitive_data/primitives/binary.yaml | 15 +++++++++++++ primitive_data/primitives/calc.yaml | 18 +++++++++++---- .../oneApiFPGA/include/tslOneAPIrt.hpp | 22 +++++++++++++++---- 3 files changed, 47 insertions(+), 8 deletions(-) diff --git a/primitive_data/primitives/binary.yaml b/primitive_data/primitives/binary.yaml index 0a9ed8ea..cacc1545 100644 --- a/primitive_data/primitives/binary.yaml +++ b/primitive_data/primitives/binary.yaml @@ -1071,6 +1071,21 @@ returns: ctype: "typename Vec::offset_base_register_type" description: "Vector containing leading zeros number." definitions: + - target_extension: ["avx512"] + ctype: ["uint32_t", "int32_t", "uint64_t", "int64_t"] + lscpu_flags: ["avx512f", "avx512cd"] + implementation: | + return _mm512_lzcnt_epi{{ intrin_tp[ctype][1] }}(data); + - target_extension: ["avx2"] + ctype: ["uint32_t", "int32_t", "uint64_t", "int64_t"] + lscpu_flags: ["avx2", "avx512cd", "avx512vl"] + implementation: | + return _mm256_lzcnt_epi{{ intrin_tp[ctype][1] }}(data); + - target_extension: ["sse"] + ctype: ["uint32_t", "int32_t", "uint64_t", "int64_t"] + lscpu_flags: ["sse", "avx512cd", "avx512vl"] + implementation: | + return _mm5_lzcnt_epi{{ intrin_tp[ctype][1] }}(data); - target_extension: ["oneAPIfpgaRTL"] ctype: ["uint32_t", "int32_t", "float"] lscpu_flags: ["oneAPIfpgaDev"] diff --git a/primitive_data/primitives/calc.yaml b/primitive_data/primitives/calc.yaml index 7194e14d..f3cfd9c3 100644 --- a/primitive_data/primitives/calc.yaml +++ b/primitive_data/primitives/calc.yaml @@ -544,16 +544,26 @@ definitions: lscpu_flags: ["oneAPIfpgaDev"] vector_length_agnostic: True implementation: | - return reducer::apply, Idof>>(value); + if constexpr(Vec::vector_element_count() < 256) { + return reducer::apply, Idof>>(value); + } else { + {% import 'core/definition_macro_helper_oneAPI.template' as helpers %} + {{ helpers.tree_like_reduce("Vec", "result_vec", "value", "+") }} + return result_vec[Vec::vector_element_count()-2]; + } - target_extension: "oneAPIfpgaRTL" ctype: ["uint8_t", "int8_t", "uint16_t", "int16_t", "uint32_t", "int32_t", "float", "uint64_t", "int64_t", "double"] lscpu_flags: ["oneAPIfpgaDev"] vector_length_agnostic: True specialization_comment: "This is for testing reasons only and does *not* use any RTL codes." implementation: | - {% import 'core/definition_macro_helper_oneAPI.template' as helpers %} - {{ helpers.tree_like_reduce("Vec", "result_vec", "value", "+") }} - return result_vec[Vec::vector_element_count()-2]; + if constexpr(Vec::vector_element_count() < 256) { + return reducer::apply, Idof>>(value); + } else { + {% import 'core/definition_macro_helper_oneAPI.template' as helpers %} + {{ helpers.tree_like_reduce("Vec", "result_vec", "value", "+") }} + return result_vec[Vec::vector_element_count()-2]; + } # - target_extension: ["oneAPIfpga", "oneAPIfpgaRTL"] # ctype: ["uint64_t", "int64_t", "double"] # lscpu_flags: ["oneAPIfpgaDev"] diff --git a/supplementary/runtime/oneApiFPGA/include/tslOneAPIrt.hpp b/supplementary/runtime/oneApiFPGA/include/tslOneAPIrt.hpp index 52b76719..82f0011a 100644 --- a/supplementary/runtime/oneApiFPGA/include/tslOneAPIrt.hpp +++ b/supplementary/runtime/oneApiFPGA/include/tslOneAPIrt.hpp @@ -10,6 +10,20 @@ #include #include +/** + * @brief This is a workaround to handle the fact that fpga_loop_fuse_independent was first introduced with oneAPI 2022.2 (compiler release 2022.1.0) + */ +namespace sycl { namespace ext { namespace intel { + template + auto tsl_fuse_loops_independent(_F f, int) -> decltype(fpga_loop_fuse_independent(f)){ + fpga_loop_fuse_independent(f); + } + template + auto tsl_fuse_loops_independent(_F f, double) -> void { + f(); + } +}}} + namespace tsl { namespace oneAPI { struct MEMORY_ON_HOST{}; @@ -180,9 +194,9 @@ namespace tsl { return q.submit( [&](sycl::handler& h) { h.single_task>([=]() [[intel::kernel_args_restrict]] { - sycl::ext::intel::fpga_loop_fuse([&] { + ::sycl::ext::intel::tsl_fuse_loops_independent([&] { return Fun::apply(args...); - }) + }, 0); }); } ).wait(); @@ -193,9 +207,9 @@ namespace tsl { return q.submit( [&](sycl::handler& h) { h.single_task([=]() [[intel::kernel_args_restrict]] { - sycl::ext::intel::fpga_loop_fuse([&]{ + sycl::ext::intel::tsl_fuse_loops_independent([&]{ return FunctorClass::apply(args...); - }); + }, 0); }); } ).wait(); From 6d523b0e6f9a8bd5da47aab732087ffab726fdbf Mon Sep 17 00:00:00 2001 From: Johannes Pietrzyk Date: Tue, 5 Sep 2023 17:01:44 +0200 Subject: [PATCH 28/47] Fixed typo --- primitive_data/primitives/binary.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/primitive_data/primitives/binary.yaml b/primitive_data/primitives/binary.yaml index cacc1545..cc5c1eb3 100644 --- a/primitive_data/primitives/binary.yaml +++ b/primitive_data/primitives/binary.yaml @@ -1085,7 +1085,7 @@ definitions: ctype: ["uint32_t", "int32_t", "uint64_t", "int64_t"] lscpu_flags: ["sse", "avx512cd", "avx512vl"] implementation: | - return _mm5_lzcnt_epi{{ intrin_tp[ctype][1] }}(data); + return _mm_lzcnt_epi{{ intrin_tp[ctype][1] }}(data); - target_extension: ["oneAPIfpgaRTL"] ctype: ["uint32_t", "int32_t", "float"] lscpu_flags: ["oneAPIfpgaDev"] From 760236625a87edccfc4117562ba81bd28d9844f3 Mon Sep 17 00:00:00 2001 From: Johannes Pietrzyk Date: Wed, 6 Sep 2023 12:28:33 +0200 Subject: [PATCH 29/47] Changed terminate -> exception Instead of terminating (if a simd size is not supported), we throw an exception. --- supplementary/runtime/cpu/include/tslCPUrt.hpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/supplementary/runtime/cpu/include/tslCPUrt.hpp b/supplementary/runtime/cpu/include/tslCPUrt.hpp index 6a4cb18d..b71d9e03 100644 --- a/supplementary/runtime/cpu/include/tslCPUrt.hpp +++ b/supplementary/runtime/cpu/include/tslCPUrt.hpp @@ -68,7 +68,7 @@ namespace tsl { {% endfor %} else { std::cerr << "ERROR: unsupported vector length" << std::endl; - std::terminate(); + throw std::runtime_error("unsupported vector length"); } } }; From 213370227d9aa80a78f295d1de08951cb2ba5ae2 Mon Sep 17 00:00:00 2001 From: Johannes Pietrzyk Date: Fri, 8 Sep 2023 16:28:11 +0200 Subject: [PATCH 30/47] Added loop fuse capability --- .../oneApiFPGA/include/tslOneAPIrt.hpp | 72 +++++++++++++++---- 1 file changed, 59 insertions(+), 13 deletions(-) diff --git a/supplementary/runtime/oneApiFPGA/include/tslOneAPIrt.hpp b/supplementary/runtime/oneApiFPGA/include/tslOneAPIrt.hpp index 82f0011a..bc542fc2 100644 --- a/supplementary/runtime/oneApiFPGA/include/tslOneAPIrt.hpp +++ b/supplementary/runtime/oneApiFPGA/include/tslOneAPIrt.hpp @@ -13,19 +13,69 @@ /** * @brief This is a workaround to handle the fact that fpga_loop_fuse_independent was first introduced with oneAPI 2022.2 (compiler release 2022.1.0) */ -namespace sycl { namespace ext { namespace intel { - template - auto tsl_fuse_loops_independent(_F f, int) -> decltype(fpga_loop_fuse_independent(f)){ - fpga_loop_fuse_independent(f); +namespace sycl::ext::intel { + namespace tsl_helper_details { + struct incomplete_helper; } - template - auto tsl_fuse_loops_independent(_F f, double) -> void { + template + std::enable_if_t> fpga_loop_fuse(F f) = delete; + // Helper type to detect the presence of fpga_loop_fuse + template + struct tsl_helper_has_fpga_loop_fuse { + template + static auto test(int) -> decltype(fpga_loop_fuse(std::declval()), std::true_type{}); + template + static auto test(...) -> std::false_type; + + using type = decltype(test(0)); + static constexpr bool value = type::value; + }; + + template + __attribute__((always_inline)) inline auto tsl_helper_loop_fuse(Fun f, int) -> std::enable_if_t::value> { + fpga_loop_fuse(f); + } + template + __attribute__((always_inline)) inline void tsl_helper_loop_fuse(Fun f, double) { + f(); + } + + + template + std::enable_if_t> fpga_loop_fuse_independent(F f) = delete; + template + struct tsl_helper_has_fpga_loop_fuse_independent { + template + static auto test(int) -> decltype(fpga_loop_fuse_independent(std::declval()), std::true_type{}); + template + static auto test(...) -> std::false_type; + + using type = decltype(test(0)); + static constexpr bool value = type::value; + }; + + template + __attribute__((always_inline)) inline auto tsl_helper_loop_fuse_independent(Fun f, int) -> std::enable_if_t::value> { + fpga_loop_fuse(f); + } + template + __attribute__((always_inline)) inline void tsl_helper_loop_fuse_independent(Fun f, double) { f(); } -}}} +} namespace tsl { namespace oneAPI { + + template + __attribute__((always_inline)) inline void loop_fuse(Fun f) { + sycl::ext::intel::tsl_helper_loop_fuse(f, 0); + } + template + __attribute__((always_inline)) inline void loop_fuse_independent(Fun f) { + sycl::ext::intel::tsl_helper_loop_fuse_independent(f, 0); + } + struct MEMORY_ON_HOST{}; struct MEMORY_ON_DEVICE{}; @@ -194,9 +244,7 @@ namespace tsl { return q.submit( [&](sycl::handler& h) { h.single_task>([=]() [[intel::kernel_args_restrict]] { - ::sycl::ext::intel::tsl_fuse_loops_independent([&] { - return Fun::apply(args...); - }, 0); + return Fun::apply(args...); }); } ).wait(); @@ -207,9 +255,7 @@ namespace tsl { return q.submit( [&](sycl::handler& h) { h.single_task([=]() [[intel::kernel_args_restrict]] { - sycl::ext::intel::tsl_fuse_loops_independent([&]{ - return FunctorClass::apply(args...); - }, 0); + return FunctorClass::apply(args...); }); } ).wait(); From e5fc4dfcc387ccde03839c0d7c1707e0506b255d Mon Sep 17 00:00:00 2001 From: Johannes Pietrzyk Date: Fri, 8 Sep 2023 20:37:09 +0200 Subject: [PATCH 31/47] fixed typo --- supplementary/runtime/oneApiFPGA/include/tslOneAPIrt.hpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/supplementary/runtime/oneApiFPGA/include/tslOneAPIrt.hpp b/supplementary/runtime/oneApiFPGA/include/tslOneAPIrt.hpp index bc542fc2..828c48c4 100644 --- a/supplementary/runtime/oneApiFPGA/include/tslOneAPIrt.hpp +++ b/supplementary/runtime/oneApiFPGA/include/tslOneAPIrt.hpp @@ -15,10 +15,10 @@ */ namespace sycl::ext::intel { namespace tsl_helper_details { - struct incomplete_helper; + struct incomplete_helper_t; } template - std::enable_if_t> fpga_loop_fuse(F f) = delete; + std::enable_if_t> fpga_loop_fuse(F f) = delete; // Helper type to detect the presence of fpga_loop_fuse template struct tsl_helper_has_fpga_loop_fuse { @@ -42,7 +42,7 @@ namespace sycl::ext::intel { template - std::enable_if_t> fpga_loop_fuse_independent(F f) = delete; + std::enable_if_t> fpga_loop_fuse_independent(F f) = delete; template struct tsl_helper_has_fpga_loop_fuse_independent { template From 3845f49dc18eabd27a221271ab998200df2553e8 Mon Sep 17 00:00:00 2001 From: Johannes Pietrzyk Date: Thu, 14 Sep 2023 11:06:08 +0200 Subject: [PATCH 32/47] Removed trailing whitespaces --- primitive_data/primitives/calc.yaml | 4 ++-- primitive_data/primitives/ls.yaml | 1 - 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/primitive_data/primitives/calc.yaml b/primitive_data/primitives/calc.yaml index 40719cb0..7601a3ec 100644 --- a/primitive_data/primitives/calc.yaml +++ b/primitive_data/primitives/calc.yaml @@ -674,7 +674,7 @@ definitions: } else { {% import 'core/definition_macro_helper_oneAPI.template' as helpers %} {{ helpers.tree_like_reduce("Vec", "result_vec", "value", "+") }} - return result_vec[Vec::vector_element_count()-2]; + return result_vec[Vec::vector_element_count()-2]; } - target_extension: "oneAPIfpgaRTL" ctype: ["uint8_t", "int8_t", "uint16_t", "int16_t", "uint32_t", "int32_t", "float", "uint64_t", "int64_t", "double"] @@ -687,7 +687,7 @@ definitions: } else { {% import 'core/definition_macro_helper_oneAPI.template' as helpers %} {{ helpers.tree_like_reduce("Vec", "result_vec", "value", "+") }} - return result_vec[Vec::vector_element_count()-2]; + return result_vec[Vec::vector_element_count()-2]; } # - target_extension: ["oneAPIfpga", "oneAPIfpgaRTL"] # ctype: ["uint64_t", "int64_t", "double"] diff --git a/primitive_data/primitives/ls.yaml b/primitive_data/primitives/ls.yaml index 808aa10c..9c9810b4 100644 --- a/primitive_data/primitives/ls.yaml +++ b/primitive_data/primitives/ls.yaml @@ -976,7 +976,6 @@ definitions: ctype: [ "uint8_t", "uint16_t", "uint32_t", "uint64_t", "int8_t", "int16_t", "int32_t", "int64_t", "float", "double" ] lscpu_flags: [ ] implementation: "return *reinterpret_cast(reinterpret_cast(memory) + index * N);" - ... --- primitive_name: "gather" From 42fa554937590277f1398cc3e3c5fcfc3b0d82f1 Mon Sep 17 00:00:00 2001 From: Johannes Pietrzyk Date: Tue, 19 Sep 2023 17:40:19 +0200 Subject: [PATCH 33/47] Changed tsl_implementation_namespace - Added tsl_implementation_namespace to config - Renamed tsl_implementation_name to implementation_namespace in tsl_generator_schema.yaml --- generator/config/default_conf.yaml | 1 + generator/config/generator/tsl_generator_schema.yaml | 2 +- generator/core/tsl_config.py | 4 ++++ 3 files changed, 6 insertions(+), 1 deletion(-) diff --git a/generator/config/default_conf.yaml b/generator/config/default_conf.yaml index f1df6cc2..4e5edd73 100644 --- a/generator/config/default_conf.yaml +++ b/generator/config/default_conf.yaml @@ -1,6 +1,7 @@ --- configuration: namespace: "tsl" + tsl_implementation_namespace: "functors" header_file_extension: ".hpp" source_file_extension: ".cpp" root_path: "./generated_tsl" diff --git a/generator/config/generator/tsl_generator_schema.yaml b/generator/config/generator/tsl_generator_schema.yaml index 5d20406c..097bea96 100644 --- a/generator/config/generator/tsl_generator_schema.yaml +++ b/generator/config/generator/tsl_generator_schema.yaml @@ -294,7 +294,7 @@ primitive: type: "str" brief: "@TODO" example: "test_add" - tsl_implementation_namespace: + implementation_namespace: type: "str" default: "functors" brief: "Namespace for template specializations." diff --git a/generator/core/tsl_config.py b/generator/core/tsl_config.py index a5d12a48..9eba0894 100644 --- a/generator/core/tsl_config.py +++ b/generator/core/tsl_config.py @@ -234,6 +234,10 @@ def get_configuration_files_entry(self, entry_name: str) -> Any: @property def lib_namespace(self) -> str: return self.get_config_entry("namespace") + + @property + def implementation_namespace(self) -> str: + return self.get_config_entry("tsl_implementation_namespace") @property def lib_header_file_extension(self) -> str: From 25a731956aa0563a5a8e8cbbed24f0b8416df2ae Mon Sep 17 00:00:00 2001 From: Johannes Pietrzyk Date: Tue, 19 Sep 2023 17:40:44 +0200 Subject: [PATCH 34/47] Changed declaration template to use SFINAE --- .../core/primitive_declaration.template | 27 ++++++++++--------- 1 file changed, 14 insertions(+), 13 deletions(-) diff --git a/generator/config/generator/tsl_templates/core/primitive_declaration.template b/generator/config/generator/tsl_templates/core/primitive_declaration.template index 401d4ccf..5552ec4d 100644 --- a/generator/config/generator/tsl_templates/core/primitive_declaration.template +++ b/generator/config/generator/tsl_templates/core/primitive_declaration.template @@ -82,22 +82,23 @@ namespace {{ tsl_implementation_namespace }} { template< VectorProcessingStyle {{ vector_name }}, {{ ns.additional_template_params_with_defaults}}ImplementationDegreeOfFreedom {{ idof_name }} = workaround{{ ns.parameter_pack_typenames_str }} - {% if functor_name != primitive_name %} - #ifdef {{ tsl_namespace|upper ~ '_' ~ tsl_implementation_namespace|upper ~ '_' ~ primitive_name|upper ~ '_STRUCT_DEFINED'}} - , typename std::enable_if_t< - !std::is_same_v< - typename {{ tsl_implementation_namespace }}::{{ primitive_name }}<{{ vector_name }}, {{ ns.additional_template_name }}{{ idof_name }}>::param_tuple_t, - typename {{ tsl_implementation_namespace }}::{{ functor_name }}<{{ vector_name }}, {{ ns.additional_template_name }}{{ idof_name }}>::param_tuple_t - >, - std::nullptr_t - > = nullptr - #endif - {% endif %} > {# If the primitive returns something, the caller has to capture the result. #} -{{ '[[nodiscard]] ' if returns['ctype'] != 'void' else '' }}{# If force_inline is set to True, we use TSL_FORCE_INLINE. #}{{ 'TSL_FORCE_INLINE ' if force_inline else '' }}{{ returns['ctype'] }} {{ primitive_name }}( +{{ '[[nodiscard]] ' if returns['ctype'] != 'void' else '' }}{# If force_inline is set to True, we use TSL_FORCE_INLINE. #}{{ 'TSL_FORCE_INLINE ' if force_inline else ' ' }}auto {{ primitive_name }}( {{ ns.full_qualified_parameters_str }} -) { +) -> std::enable_if_t< +{% if functor_name == primitive_name %} + tsl_functor_defined<{{ tsl_implementation_namespace }}::{{ functor_name }}<{{ vector_name }}, {{ ns.additional_template_name }}{{ idof_name }}>>::value, + {{ returns['ctype'] }} +{% else %} +#ifdef {{ tsl_namespace|upper ~ '_' ~ tsl_implementation_namespace|upper ~ '_' ~ primitive_name|upper ~ '_STRUCT_DEFINED'}} + !tsl_primitive_ambiguous<{{ tsl_implementation_namespace }}::{{ functor_name }}<{{ vector_name }}, {{ ns.additional_template_name }}{{ idof_name }}>, {{ tsl_implementation_namespace }}::{{ functor_name }}<{{ vector_name }}, {{ ns.additional_template_name }}{{ idof_name }}>>(), +#else + true, +#endif + {{ returns['ctype'] }} +{% endif %} +>{ {# If the function primitive implementation returns something we have to return it, otherwise we just call it. #} {# Call the actual implementation with all parameters. #} {{ 'return ' if returns['ctype'] != 'void' else '' }}{{ tsl_implementation_namespace }}::{{ functor_name }}<{{ vector_name }}, {{ ns.additional_template_name }}{{ idof_name }}>::apply( From 1491b8255450427767da26d7beff9703d4a94f0d Mon Sep 17 00:00:00 2001 From: Johannes Pietrzyk Date: Tue, 19 Sep 2023 17:41:58 +0200 Subject: [PATCH 35/47] Added Dependencygraph, added zero-copy versions for impl functions --- generator/core/ctrl/tsl_dependendies.py | 357 +++++++++++++++++++ generator/core/ctrl/tsl_lib.py | 15 +- generator/core/ctrl/tsl_libfile_generator.py | 16 + generator/core/model/tsl_primitive.py | 19 + generator/core/tsl_generator.py | 24 +- 5 files changed, 429 insertions(+), 2 deletions(-) create mode 100644 generator/core/ctrl/tsl_dependendies.py diff --git a/generator/core/ctrl/tsl_dependendies.py b/generator/core/ctrl/tsl_dependendies.py new file mode 100644 index 00000000..21de4c92 --- /dev/null +++ b/generator/core/ctrl/tsl_dependendies.py @@ -0,0 +1,357 @@ +from __future__ import annotations +from generator.core.tsl_config import config +from generator.core.ctrl.tsl_lib import TSLLib + +from typing import Generator, Dict, Iterator, Set, List, Tuple +import re +import networkx as nx +from dataclasses import dataclass +import pandas as pd + +class TSLPrimitiveRegex: + def __init__(self): + self.tsl_primitive_regex_parts = { + "namespace": rf'\s+({config.lib_namespace}::)?', + "functor_namespace": rf'\s+({config.lib_namespace}::)?({config.implementation_namespace}::)?', + "template": r'<[^;]*>', + "parameters": r'\(([^)]*)\)?', + } + + def primitive_name_to_regex(self, primitive_name: str) -> str: + return rf'({self.tsl_primitive_regex_parts["namespace"]}({primitive_name})\s*{self.tsl_primitive_regex_parts["template"]}\s*{self.tsl_primitive_regex_parts["parameters"]})' + + def functor_name_to_regex(self, primitive_name: str) -> str: + return rf'({self.tsl_primitive_regex_parts["functor_namespace"]}({primitive_name})\s*{self.tsl_primitive_regex_parts["template"]}\s*{self.tsl_primitive_regex_parts["parameters"]})' + + def primitives_to_regex(self, primitives: Generator[Tuple[str, str], None, None]) -> str: + primitives_regex_list = [] + for primitive_name, functor_name in primitives: + primitives_regex_list.append(self.primitive_name_to_regex(primitive_name)) + if functor_name is not None and functor_name != "" and functor_name != primitive_name: + primitives_regex_list.append(self.functor_name_to_regex(functor_name)) + for x in primitives_regex_list: + print(x) + return rf'({"|".join(primitives_regex_list)})' + + def get_primitive_name_from_match(self, match, group_count: int) -> str: + found_count = 0 + for i in range(group_count, 0, -1): + if match.group(i) is not None: + found_count += 1 + if found_count == 2: + return match.group(i) + +class TSLDependencyGraphDeprecated: + def __init__(self, tsl_lib: TSLLib): + self.__tsl_lib = tsl_lib + self.__primitive_regex_helper = TSLPrimitiveRegex() + self.__primitive_regex = re.compile(self.__primitive_regex_helper.primitives_to_regex(tsl_lib.known_primitives_names_and_functor)) + print(f"REGEX: {self.__primitive_regex_helper.primitives_to_regex(tsl_lib.known_primitives_names_and_functor)}") + self.__primitive_to_class_dict: Dict[str, str] = {} + for class_name, primitive in tsl_lib.known_primitives: + self.__primitive_to_class_dict[primitive.declaration.name] = class_name + if primitive.declaration.functor_name is not None and primitive.declaration.functor_name != "" and primitive.declaration.functor_name != primitive.declaration.name: + self.__primitive_to_class_dict[primitive.declaration.functor_name] = class_name + + + def get_dependencies(self, implementation_str: str) -> List[str]: + dependencies: Set[str] = set() + for match in self.__primitive_regex.finditer(implementation_str): + required_primitive = self.__primitive_regex_helper.get_primitive_name_from_match(match, self.__primitive_regex.groups) + required_class = self.__primitive_to_class_dict[required_primitive] + dependencies.add(required_class) + return list(dependencies) + + def sort_tsl_classes(self, primitive_generator_implementation_fun_name: str): + class_set: Set[str] = {c.name for c in self.__tsl_lib.primitive_class_set} + dependency_graph: nx.DiGraph = nx.DiGraph() + for class_name, primitive in self.__tsl_lib.known_primitives: + fun = getattr(primitive, primitive_generator_implementation_fun_name) + for implementation_str in fun(False): + for match in self.__primitive_regex.finditer(implementation_str): + required_primitive = self.__primitive_regex_helper.get_primitive_name_from_match(match, self.__primitive_regex.groups) + print(f"primitive: {primitive.declaration.name} - required_primitive: {required_primitive}") + + required_class = self.__primitive_to_class_dict[required_primitive] + if (required_class != class_name) and (required_primitive != primitive.declaration.name): + dependency_graph.add_nodes_from([class_name, required_class]) + dependency_graph.add_edge(required_class, class_name) + class_set.discard(class_name) + class_set.discard(required_class) + print(f"{class_name}::{primitive.declaration.name} requires {required_class}::{required_primitive}") + try: + order = nx.topological_sort(dependency_graph) + except nx.NetworkXUnfeasible: + print("Cyclic dependency detected. Please fix this first.") + exit(1) + for class_name in class_set: + yield class_name + for primitive_class in order: + yield primitive_class + + +class TSLDependencyGraph: + @dataclass(order=True, unsafe_hash=True, frozen=True) + class PrimitiveClassNode: + name: str + type: str = "class" + size: int = 10 + def __str__(self): + return f"{self.name}" + def __repr__(self): + return str(self) + def id(self): + return self.name + def attributes(self): + return {"name": self.name, "type": self.type, "size": self.size} + @dataclass(order=True, unsafe_hash=True, frozen=True) + class PrimitiveNode: + name: str + type: str = "primitive" + size: int = 5 + def __str__(self): + return f"{self.name}" + def __repr__(self): + return str(self) + def id(self): + return self.name + def attributes(self): + return {"name": self.name, "type": self.type, "size": self.size} + @dataclass(order=True, unsafe_hash=True, frozen=True) + class PrimitiveTestNode: + name: str + type: str = "test" + size: int = 3 + def __str__(self): + return f"{self.name}" + def __repr__(self): + return str(self) + def id(self): + return self.name + def attributes(self): + return {"name": self.name, "type": self.type, "size": self.size} + + @property + def graph(self) -> nx.DiGraph: + return self.__dependency_graph + + def __init__(self, tsl_lib: TSLLib) -> None: + self.__tsl_lib = tsl_lib + self.__dependency_graph: nx.DiGraph = nx.DiGraph() + for primitive_class in self.__tsl_lib.primitive_class_set: + self.__dependency_graph.add_node(self.PrimitiveClassNode(primitive_class.name)) + for class_name, primitive in self.__tsl_lib.known_primitives: + self.__dependency_graph.add_node(self.PrimitiveNode(primitive.declaration.functor_name)) + self.__dependency_graph.add_edge(self.PrimitiveNode(primitive.declaration.functor_name), self.PrimitiveClassNode(class_name), label="part of") + + self.__primitive_regex_str: str = rf'(? None: + for _, primitive in self.__tsl_lib.known_primitives: + for test_name, implementation_str in primitive.get_tests_implementations(False): + fq_test_name = f"{primitive.declaration.functor_name}::{test_name}" + self.__dependency_graph.add_node(self.PrimitiveTestNode(fq_test_name)) + self.__dependency_graph.add_edge(self.PrimitiveTestNode(fq_test_name), self.PrimitiveNode(primitive.declaration.functor_name), label="test of") + for match in self.__primitive_regex.finditer(implementation_str): + required_primitive = match.group(2) + if required_primitive != primitive.declaration.functor_name: + self.__dependency_graph.add_edge(self.PrimitiveNode(required_primitive), self.PrimitiveTestNode(fq_test_name), label="depends on") + + def __has_primitive(self, primitive_name: str) -> bool: + return self.PrimitiveNode(primitive_name) in self.__dependency_graph + + def __traverse_from_primitives(self, primitives_names: List[str], node_types_of_interest: list, reversed: bool, self_contained: bool = False) -> Set[str]: + unknown_primitives = list(filter(lambda primitive_name: not self.__has_primitive(primitive_name), primitives_names)) + if len(unknown_primitives) > 0: + raise Exception(f"Primitives {', '.join(map(str, unknown_primitives))} not found in dependency graph.") + else: + self_set = set(primitives_names) if self_contained else set() + #edge[0]: source, edge[1]: target + return {*(primitive_name for current_primitive_name in primitives_names for primitive_name in + map( + lambda edge: edge[1].name, + filter( + lambda edge: any(isinstance(edge[1], node_type) for node_type in node_types_of_interest), + nx.bfs_edges(self.__dependency_graph, self.PrimitiveNode(current_primitive_name), reverse=reversed))) + ), *self_set} + + def is_acyclic(self) -> bool: + return nx.is_directed_acyclic_graph(self.__dependency_graph) + + def get_cycles_as_str(self) -> List[str]: + return list(map(lambda list_of_nodes: " -> ".join(map(lambda node: node.name, list_of_nodes)), nx.simple_cycles(self.__dependency_graph))) + + def get_direct_predecessor_names(self, node, node_types_of_interest: list) -> List[str]: + return list(map(lambda edge: edge[0].name, filter(lambda edge: any(isinstance(edge[0], node_type) for node_type in node_types_of_interest), self.__dependency_graph.in_edges(node)))) + + def get_primitive_nodes(self) -> List[TSLDependencyGraph.PrimitiveNode]: + return list(filter(lambda node: isinstance(node, self.PrimitiveNode), self.__dependency_graph.nodes)) + + def get_primitives_count(self) -> int: + return sum(1 for _ in filter(lambda node: isinstance(node, self.PrimitiveNode), self.__dependency_graph.nodes)) + + def get_class_nodes(self) -> List[TSLDependencyGraph.PrimitiveClassNode]: + return list(filter(lambda node: isinstance(node, self.PrimitiveClassNode), self.__dependency_graph.nodes)) + + def class_nodes(self) -> Iterator[TSLDependencyGraph.PrimitiveClassNode]: + return filter(lambda node: isinstance(node, self.PrimitiveClassNode), self.__dependency_graph.nodes) + + def get_classes_count(self) -> int: + return sum(1 for _ in filter(lambda node: isinstance(node, self.PrimitiveClassNode), self.__dependency_graph.nodes)) + + def get_test_nodes(self) -> List[TSLDependencyGraph.PrimitiveTestNode]: + return list(filter(lambda node: isinstance(node, self.PrimitiveTestNode), self.__dependency_graph.nodes)) + + def get_tests_count(self) -> int: + return sum(1 for _ in filter(lambda node: isinstance(node, self.PrimitiveTestNode), self.__dependency_graph.nodes)) + + def get_required_primitives(self, primitive_names: str|List[str], self_contained: bool = False) -> List[str]: + if isinstance(primitive_names, str): + primitive_names = primitive_names.split(" ") + return list(self.__traverse_from_primitives(primitive_names, [self.PrimitiveNode], True, self_contained)) + + def get_dependent_primitives(self, primitive_names: str|List[str], self_contained: bool = False) -> List[str]: + if isinstance(primitive_names, str): + primitive_names = primitive_names.split(" ") + return list(self.__traverse_from_primitives(primitive_names, [self.PrimitiveNode], False, self_contained)) + + def get_associated_class(self, primitive_name) -> str: + if not self.__has_primitive(primitive_name): + raise Exception(f"Primitive {primitive_name} not found in dependency graph.") + else: + for successor in self.__dependency_graph.successors(self.PrimitiveNode(primitive_name)): + if isinstance(successor, self.PrimitiveClassNode): + return successor.name + raise Exception(f"Primitive {primitive_name} has no associated class.") + + def get_required_classes(self, primitive_names: str|List[str]) -> List[str]: + required_primitives = self.get_required_primitives(primitive_names, True) + return list({ + self.get_associated_class(required_primitive) for required_primitive in required_primitives + }) + + def get_dependent_classes(self, primitive_names: str|List[str]) -> List[str]: + dependent_primitives = self.get_dependent_primitives(primitive_names, True) + return list({ + self.get_associated_class(dependent_primitive) for dependent_primitive in dependent_primitives + }) + + def sorted_classes(self) -> Generator[str, None, None]: + class_graph = nx.DiGraph() + for cls in self.class_nodes(): + class_graph.add_node(cls.name) + for predecessor in self.get_required_classes(self.get_direct_predecessor_names(cls, [self.PrimitiveNode])): + if predecessor != cls.name: + if False: + if predecessor == "calc" and cls.name == "convert": + for x in self.get_direct_predecessor_names(cls, [self.PrimitiveNode]): + for requc in self.get_required_classes([x]): + print(f"{x} ---> {requc}") + class_graph.add_edge(predecessor, cls.name) + try: + ordered_class_graph = nx.topological_sort(class_graph) + except nx.NetworkXUnfeasible: + print("Unable to sort class graph.") + exit(1) + for cls in ordered_class_graph: + yield cls + + def get_required_tests(self, primitive_names: str|List[str]) -> List[str]: + if isinstance(primitive_names, str): + primitive_names = primitive_names.split(" ") + return list(self.__traverse_from_primitives(primitive_names, [self.PrimitiveTestNode], True)) + + def get_dependent_tests(self, primitive_names: str|List[str]) -> List[str]: + if isinstance(primitive_names, str): + primitive_names = primitive_names.split(" ") + return list(self.__traverse_from_primitives(primitive_names, [self.PrimitiveTestNode], False)) + + def tested_primitive_count(self) -> int: + nodes = self.get_primitive_nodes() + result = 0 + for primitive_node in nodes: + test_cases = self.get_direct_predecessor_names(primitive_node, [self.PrimitiveTestNode]) + if len(test_cases) > 0: + result += 1 + return result + + def missing_tests(self) -> Generator[str, None, None]: + nodes = self.get_primitive_nodes() + for primitive_node in nodes: + test_cases = self.get_direct_predecessor_names(primitive_node, [self.PrimitiveTestNode]) + if len(test_cases) == 0: + yield primitive_node.name + + def as_str(self, include_tests: bool = False) -> str: + class_count = self.get_classes_count() + primitives_count = self.get_primitives_count() + result = f"""TSL - Summary: + - # Primitive Classes: {class_count} + - # Primitives: {primitives_count}""" + if include_tests: + tests_count = self.get_tests_count() + tested_primitive_count = self.tested_primitive_count() + test_coverage = tested_primitive_count / primitives_count + average_tests_per_primitive = (tests_count / primitives_count) + result = f"""{result} + - # Tests: {tests_count} + - Primitives w/ Tests: {tested_primitive_count} + - Primitives w/o Tests: {primitives_count - tested_primitive_count} + - Test Coverage: {test_coverage * 100:.2f}% + - Avg. Tests/Primitive: {average_tests_per_primitive:.2f}""" + return result + + def draw(self, out_name: str = "dependency_graph"): + from networkx.drawing.nx_agraph import to_agraph + g = to_agraph(self.__dependency_graph) + # pos = nx.nx_agraph.graphviz_layout(self.__graph) + g.layout() + config.generation_out_path.joinpath(out_name).with_suffix(".png").parent.mkdir(parents=True, exist_ok=True) + g.draw(config.generation_out_path.joinpath(out_name).with_suffix(".png"), prog='dot') + + def to_pandas(self) -> Tuple[pd.DataFrame, pd.DataFrame]: + edge_list = [] + node_list = [] + # Iterate over edges and nodes in the dependency graph + for source, target in self.__dependency_graph.edges(): + source_node = source + target_node = target + edge_data = { + 'from': source_node.id(), + 'to': target_node.id(), + 'weight': 1, + 'strength': "medium", + **self.__dependency_graph.get_edge_data(source, target) + } + edge_list.append(edge_data) + for node in self.__dependency_graph.nodes(): + node_data = { + 'id': node.id(), + **node.attributes() + } + node_list.append(node_data) + edge_df = pd.DataFrame(edge_list) + node_df = pd.DataFrame(node_list) + return edge_df, node_df + + def to_json(self, out_name: str = "dependency_graph") -> None: + edge_df, node_df = self.to_pandas() + edge_df.to_json(config.generation_out_path.joinpath(out_name).with_suffix(".edges.json")) + node_df.to_json(config.generation_out_path.joinpath(out_name).with_suffix(".nodes.json")) + + def to_jaal(self) -> None: + from jaal import Jaal + edge_df, node_df = self.to_pandas() + port=8050 + while True: + try: + Jaal(edge_df, node_df).plot(directed=True,port=port) + except: + port+=1 diff --git a/generator/core/ctrl/tsl_lib.py b/generator/core/ctrl/tsl_lib.py index 90e1c1b8..a31762f8 100644 --- a/generator/core/ctrl/tsl_lib.py +++ b/generator/core/ctrl/tsl_lib.py @@ -6,7 +6,7 @@ from generator.core.tsl_config import config from generator.core.model.tsl_extension import TSLExtensionSet, TSLExtension -from generator.core.model.tsl_primitive import TSLPrimitiveClassSet +from generator.core.model.tsl_primitive import TSLPrimitiveClassSet, TSLPrimitive from generator.utils.log_utils import LogInit @@ -103,6 +103,19 @@ def copy_relevant_supplementary_files(self) -> None: runtime_header_output.write(template.render(runtime_relevant_data_dict)) # shutil.copy(fpath.resolve(), supplementary_root_path.joinpath(fpath).resolve(), follow_symlinks=True) + @property + def known_primitives_name(self) -> Generator[str, None, None]: + for primitive_class in self.__primitive_class_set: + for primitive in primitive_class: + yield primitive.declaration.name + + @property + def known_primitives(self) -> Generator[Tuple[str, TSLPrimitive], None, None]: + for primitive_class in self.__primitive_class_set: + for primitive in primitive_class: + yield primitive_class.name, primitive + + @LogInit() def __init__(self, extension_set: TSLExtensionSet, primitive_class_set: TSLPrimitiveClassSet) -> None: self.__extension_set = extension_set diff --git a/generator/core/ctrl/tsl_libfile_generator.py b/generator/core/ctrl/tsl_libfile_generator.py index be49bfca..b79e36d7 100644 --- a/generator/core/ctrl/tsl_libfile_generator.py +++ b/generator/core/ctrl/tsl_libfile_generator.py @@ -15,6 +15,7 @@ from generator.utils.file_utils import strip_common_path_prefix from generator.utils.log_utils import LogInit from generator.utils.yaml_utils import yaml_load, YamlDataType +from generator.core.ctrl.tsl_dependendies import TSLDependencyGraph @@ -149,6 +150,16 @@ def __create_static_header_files(self) -> None: tsl_file.add_code_to_be_rendered(implementation) self.__static_files.append(tsl_file) + def __sort_header_files(self, sorted_keys: List[str], header_files: List[TSLHeaderFile]) -> List[TSLHeaderFile]: + result: List[TSLHeaderFile] = [] + for key in sorted_keys: + for header_file in header_files: + if header_file.file_name.stem.startswith(key): + result.append(header_file) + if len(result) != len(header_files): + raise Exception("Could not sort header files.") + return result + @LogInit() def __init__(self, lib: TSLLib) -> None: self.__static_files: List[TSLHeaderFile] = [] @@ -160,10 +171,15 @@ def __init__(self, lib: TSLLib) -> None: self.__create_primitive_header_files(lib.extension_set, lib.primitive_class_set) self.__create_static_header_files() + # dep_graph = TSLDependencyGraph(lib) + # print("Checking implementation dependencies:") + # ordered_primitive_classes = list(dep_graph.sort_tsl_classes("get_implementations")) + generated_files_root: TSLHeaderFile = TSLHeaderFile.create_from_dict(config.lib_generated_files_root_header, {}) for extension_file in self.extension_files: generated_files_root.add_file_include(extension_file) + # for primitive_declaration in self.__sort_header_files(ordered_primitive_classes, list(self.primitive_declaration_files)): for primitive_declaration in self.primitive_declaration_files: generated_files_root.add_file_include(primitive_declaration) for primitive_definition in self.primitive_definition_files: diff --git a/generator/core/model/tsl_primitive.py b/generator/core/model/tsl_primitive.py index 9dda5a75..ef3c3d39 100644 --- a/generator/core/model/tsl_primitive.py +++ b/generator/core/model/tsl_primitive.py @@ -19,6 +19,8 @@ class Declaration: def __init__(self, data_dict: dict): self.__data_dict = data_dict self.__data_dict["tsl_namespace"] = config.get_config_entry("namespace") + if "tsl_implementation_namespace" not in self.__data_dict: + self.__data_dict["tsl_implementation_namespace"] = config.implementation_namespace if len(data_dict["functor_name"]) == 0: self.__data_dict["functor_name"] = data_dict["primitive_name"] self.log(logging.INFO, f"Created Primitive Declaration {self.__data_dict['primitive_name']}") @@ -301,6 +303,23 @@ def tests(self) -> Generator[YamlDataType, None, None]: if self.has_test(): for test in self.declaration.data["testing"]: yield copy.deepcopy(test) + + def get_tests_implementations(self, copy: bool = True) -> Generator[Tuple[str, str], None, None]: + if self.has_test(): + for test in self.declaration.data["testing"]: + if "implementation" in test: + if copy: + yield test["test_name"], copy.deepcopy(test["implementation"]) + else: + yield test["test_name"], test["implementation"] + + def get_implementations(self, copy: bool = True) -> Generator[str, None, None]: + if copy: + for definition in self.definitions: + yield copy.deepcopy(definition.data["implementation"]) + else: + for definition in self.definitions: + yield definition.data["implementation"] @staticmethod @requirement(data_dict="NotNone;dict") diff --git a/generator/core/tsl_generator.py b/generator/core/tsl_generator.py index 9a884890..45af16bb 100644 --- a/generator/core/tsl_generator.py +++ b/generator/core/tsl_generator.py @@ -10,6 +10,7 @@ from generator.core.model.tsl_extension import TSLExtensionSet from generator.core.model.tsl_primitive import TSLPrimitiveClass, TSLPrimitiveClassSet from generator.core.tsl_config import config +from generator.core.ctrl.tsl_dependendies import TSLDependencyGraph from generator.expansions.tsl_readme_md import create_readme from generator.expansions.tsl_translation_unit import TSLTranslationUnitContainer from generator.expansions.tsl_unit_test import TSLTestGenerator @@ -138,6 +139,7 @@ def generate(self, relevant_hardware_flags: List[str] = None, relevant_primitive implregex_list.append( primitive.declaration.functor_name ) implregex_string = f'(? Date: Tue, 19 Sep 2023 17:42:27 +0200 Subject: [PATCH 36/47] Added helper classes to better support SFINAE --- .../static_files/core/utils/preprocessor.yaml | 28 +++++++++++++++++++ 1 file changed, 28 insertions(+) diff --git a/generator/static_files/core/utils/preprocessor.yaml b/generator/static_files/core/utils/preprocessor.yaml index 3da0a3e4..5da07c05 100755 --- a/generator/static_files/core/utils/preprocessor.yaml +++ b/generator/static_files/core/utils/preprocessor.yaml @@ -18,4 +18,32 @@ implementations: #ifndef TSL_CVAL # define TSL_CVAL(type, value) std::integral_constant{} #endif + - | + template + struct class_declared : std::false_type {}; + template + struct class_declared : std::true_type {}; + - | + template + struct tsl_functor_defined { + template + static auto test(int) -> decltype(CurrentHelperStruct::apply, std::true_type{}); + template + static auto test(...) -> std::false_type; + using type = decltype(test(0)); + static constexpr bool value = type::value; + }; + - | + template + constexpr bool tsl_primitive_ambiguous() { + if constexpr ((class_declared::value)) { + if constexpr ((tsl_functor_defined::value) && (tsl_functor_defined::value)) { + return std::is_same_v; + } else { + return false; + } + } else { + return false; + } + } ... From 8dcae4ffde080171a52e1eecfab7e4865c761dbc Mon Sep 17 00:00:00 2001 From: Johannes Pietrzyk Date: Tue, 19 Sep 2023 17:42:55 +0200 Subject: [PATCH 37/47] Corrected spelling --- primitive_data/primitives/convert.yaml | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/primitive_data/primitives/convert.yaml b/primitive_data/primitives/convert.yaml index 210a6a4b..28a69147 100644 --- a/primitive_data/primitives/convert.yaml +++ b/primitive_data/primitives/convert.yaml @@ -380,9 +380,9 @@ definitions: __m256i magic_i_all = _mm256_set1_epi64x(0x4530000080100000); /* 2^84 + 2^63 + 2^52 encoded as floating-point */ __m256d magic_d_all = _mm256_castsi256_pd(magic_i_all); - __m256i v_lo = _mm256_blend_epi32(magic_i_lo, data, 0b01010101); /* Blend the 32 lowest significant bits of v with magic_int_lo */ + __m256i v_lo = _mm256_blend_epi32(magic_i_lo, data, 0b01010101); /* Blends the 32 lowest significant bits of v with magic_int_lo */ __m256i v_hi = _mm256_srli_epi64(data, 32); /* Extract the 32 most significant bits of v */ - v_hi = _mm256_xor_si256(v_hi, magic_i_hi32); /* Flip the msb of v_hi and blend with 0x45300000 */ + v_hi = _mm256_xor_si256(v_hi, magic_i_hi32); /* Flip the msb of v_hi and blends with 0x45300000 */ __m256d v_hi_dbl = _mm256_sub_pd(_mm256_castsi256_pd(v_hi), magic_d_all); /* Compute in double precision: */ return _mm256_add_pd(v_hi_dbl, _mm256_castsi256_pd(v_lo)); /* (v_hi - magic_d_all) + v_lo Do not assume associativity of floating point addition !! */ - target_extension: "avx2" @@ -397,9 +397,9 @@ definitions: __m256i magic_i_all = _mm256_set1_epi64x(0x4530000000100000); /* 2^84 + 2^52 encoded as floating-point */ __m256d magic_d_all = _mm256_castsi256_pd(magic_i_all); - __m256i v_lo = _mm256_blend_epi32(magic_i_lo, data, 0b01010101); /* Blend the 32 lowest significant bits of v with magic_int_lo */ + __m256i v_lo = _mm256_blend_epi32(magic_i_lo, data, 0b01010101); /* Blends the 32 lowest significant bits of v with magic_int_lo */ __m256i v_hi = _mm256_srli_epi64(data, 32); /* Extract the 32 most significant bits of v */ - v_hi = _mm256_xor_si256(v_hi, magic_i_hi32); /* Blend v_hi with 0x45300000 */ + v_hi = _mm256_xor_si256(v_hi, magic_i_hi32); /* Blends v_hi with 0x45300000 */ __m256d v_hi_dbl = _mm256_sub_pd(_mm256_castsi256_pd(v_hi), magic_d_all); /* Compute in double precision: */ __m256d result = _mm256_add_pd(v_hi_dbl, _mm256_castsi256_pd(v_lo)); /* (v_hi - magic_d_all) + v_lo Do not assume associativity of floating point addition !! */ return result; From b253399743626cf475f37df6cf97bbe95a18d695 Mon Sep 17 00:00:00 2001 From: Johannes Pietrzyk Date: Tue, 19 Sep 2023 17:43:16 +0200 Subject: [PATCH 38/47] Corrected lscpu_flag for sse4.1 --- primitive_data/primitives/mask.yaml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/primitive_data/primitives/mask.yaml b/primitive_data/primitives/mask.yaml index 77bf34cb..05af8d90 100644 --- a/primitive_data/primitives/mask.yaml +++ b/primitive_data/primitives/mask.yaml @@ -486,7 +486,7 @@ definitions: return _mm_castsi128_ps(_mm_cmpeq_epi32(anded_vec, and_vec)); - target_extension: ["sse"] ctype: ["uint64_t", "int64_t"] - lscpu_flags: ["sse2", "sse4.1"] + lscpu_flags: ["sse2", "sse4_1"] implementation: | auto const and_vec = _mm_set_epi64x(0x2, 0x1); auto const data_vec = _mm_set1_epi64x(mask); @@ -494,7 +494,7 @@ definitions: return _mm_cmpeq_epi64(anded_vec, and_vec); - target_extension: ["sse"] ctype: ["double"] - lscpu_flags: ["sse2", "sse4.1"] + lscpu_flags: ["sse2", "sse4_1"] implementation: | auto const and_vec = _mm_set_epi64x(0x2, 0x1); auto const data_vec = _mm_set1_epi64x(mask); From 1fbd6b1f9ca0b8e528c261ac498f8a42c72666a9 Mon Sep 17 00:00:00 2001 From: Johannes Pietrzyk Date: Tue, 19 Sep 2023 17:43:29 +0200 Subject: [PATCH 39/47] Changed test to avoid cyclic dependency --- primitive_data/primitives/ls.yaml | 19 ++++++++++++++----- 1 file changed, 14 insertions(+), 5 deletions(-) diff --git a/primitive_data/primitives/ls.yaml b/primitive_data/primitives/ls.yaml index 9c9810b4..a99310db 100644 --- a/primitive_data/primitives/ls.yaml +++ b/primitive_data/primitives/ls.yaml @@ -523,11 +523,20 @@ returns: description: "Vector containing 0 in all lanes." testing: - test_name: "default" - requires: ["hor"] + requires: ["set1"] implementation: | - auto zero_vec = set_zero(); - auto result = hor(zero_vec); - return !((bool) result); + using T = typename Vec::base_type; + testing::test_memory_helper_t test_helper{Vec::vector_element_count(), Vec::vector_element_count(), false}; + bool allOk = true; + auto reference_result_ptr = test_helper.result_ref(); + auto test_result_ptr = test_helper.result_target(); + auto vec = set_zero(); + storeu(test_result_ptr, vec); + for (size_t i = 0; i < Vec::vector_element_count(); ++i) { + reference_result_ptr[i] = (T)0; + } + test_helper.synchronize(); + return test_helper.validate(); definitions: #INTEL - AVX512 - target_extension: "avx512" @@ -1527,7 +1536,7 @@ definitions: lscpu_flags: [] implementation: | if(mask){ - *memory = tsl::extract_value(data); + *memory = data; } ... --- From eae03e63a8d89aaffd06818d2edb0434c1e7e3e6 Mon Sep 17 00:00:00 2001 From: Johannes Pietrzyk Date: Wed, 20 Sep 2023 16:49:21 +0200 Subject: [PATCH 40/47] Fixed DependencyGraph + include order --- generator/core/ctrl/tsl_dependencies.py | 321 +++++++++++++++++ generator/core/ctrl/tsl_dependendies.py | 357 ------------------- generator/core/ctrl/tsl_libfile_generator.py | 12 +- generator/core/tsl_generator.py | 27 +- generator/expansions/tsl_unit_test.py | 18 +- 5 files changed, 351 insertions(+), 384 deletions(-) create mode 100644 generator/core/ctrl/tsl_dependencies.py delete mode 100644 generator/core/ctrl/tsl_dependendies.py diff --git a/generator/core/ctrl/tsl_dependencies.py b/generator/core/ctrl/tsl_dependencies.py new file mode 100644 index 00000000..bcb73aea --- /dev/null +++ b/generator/core/ctrl/tsl_dependencies.py @@ -0,0 +1,321 @@ +from __future__ import annotations +from generator.core.tsl_config import config +from generator.core.ctrl.tsl_lib import TSLLib + +from typing import Generator, Dict, Iterator, Set, List, Tuple, NewType, Union +import re +import networkx as nx +from dataclasses import dataclass +import pandas as pd + + +class TSLDependencyGraph: + @dataclass(order=True, unsafe_hash=True, frozen=True) + class PrimitiveClassNode: + name: str + type: str = "class" + size: int = 10 + def __str__(self): + return f"{self.name}" + def __repr__(self): + return str(self) + def id(self): + return self.name + def attributes(self): + return {"name": self.name, "type": self.type, "size": self.size} + @dataclass(order=True, unsafe_hash=True, frozen=True) + class PrimitiveNode: + name: str + type: str = "primitive" + size: int = 5 + def __str__(self): + return f"{self.name}" + def __repr__(self): + return str(self) + def id(self): + return self.name + def attributes(self): + return {"name": self.name, "type": self.type, "size": self.size} + @dataclass(order=True, unsafe_hash=True, frozen=True) + class PrimitiveTestNode: + name: str + type: str = "test" + size: int = 3 + def __str__(self): + return f"{self.name}" + def __repr__(self): + return str(self) + def id(self): + return self.name + def attributes(self): + return {"name": self.name, "type": self.type, "size": self.size} + + NodeType = PrimitiveClassNode | PrimitiveNode | PrimitiveTestNode + + @property + def graph(self) -> nx.DiGraph: + return self.__dependency_graph + + def __init__(self, tsl_lib: TSLLib) -> None: + self.__tsl_lib = tsl_lib + self.__dependency_graph: nx.DiGraph = nx.DiGraph() + for primitive_class in self.__tsl_lib.primitive_class_set: + self.__dependency_graph.add_node(self.PrimitiveClassNode(primitive_class.name)) + for class_name, primitive in self.__tsl_lib.known_primitives: + self.__dependency_graph.add_node(TSLDependencyGraph.PrimitiveNode(primitive.declaration.functor_name)) + self.__dependency_graph.add_edge(TSLDependencyGraph.PrimitiveNode(primitive.declaration.functor_name), self.PrimitiveClassNode(class_name), label="part of") + + self.__primitive_regex_str: str = rf'(? None: + for _, primitive in self.__tsl_lib.known_primitives: + for test_name, implementation_str in primitive.get_tests_implementations(False): + fq_test_name = f"{primitive.declaration.functor_name}::{test_name}" + self.__dependency_graph.add_node(self.PrimitiveTestNode(fq_test_name)) + self.__dependency_graph.add_edge(self.PrimitiveTestNode(fq_test_name), TSLDependencyGraph.PrimitiveNode(primitive.declaration.functor_name), label="test of") + for match in self.__primitive_regex.finditer(implementation_str): + required_primitive = match.group(2) + if required_primitive != primitive.declaration.functor_name: + self.__dependency_graph.add_edge(TSLDependencyGraph.PrimitiveNode(required_primitive), self.PrimitiveTestNode(fq_test_name), label="depends on") + + def find(self, name: str) -> NodeType|None: + for node_type in TSLDependencyGraph.NodeType.__args__: + node = node_type(name) + if node in self.__dependency_graph: + return node + return None + + def traverse_by_type(self, nodes: List[NodeType], node_types_of_interest: list, reversed:bool, self_contained: bool = False) -> Generator[NodeType, None, None]: + for current_node in nodes: + if self_contained: + yield current_node + for edge in filter(lambda edge: any(isinstance(edge[1], node_type) for node_type in node_types_of_interest), nx.bfs_edges(self.__dependency_graph, current_node, reverse=reversed)): + yield edge[1] + + def traverse(self, nodes: List[NodeType], reversed: bool, self_contained: bool = False) -> Generator[NodeType, None, None]: + for current_node in nodes: + if self_contained: + yield current_node + for edge in nx.bfs_edges(self.__dependency_graph, current_node, reverse=reversed): + yield edge[1] + + def predecessors_by_type(self, node: NodeType, node_types_of_interest: list) -> Generator[NodeType, None, None]: + for edge in filter(lambda edge: any(isinstance(edge[0], node_type) for node_type in node_types_of_interest), self.__dependency_graph.in_edges(node)): + yield edge[0] + + def first_predecessor_by_type(self, node: NodeType, node_types_of_interest: list) -> NodeType|None: + for edge in filter(lambda edge: any(isinstance(edge[0], node_type) for node_type in node_types_of_interest), self.__dependency_graph.in_edges(node)): + return edge[0] + return None + + def successors_by_type(self, node: NodeType, node_types_of_interest: list) -> Generator[NodeType, None, None]: + for edge in filter(lambda edge: any(isinstance(edge[1], node_type) for node_type in node_types_of_interest), self.__dependency_graph.out_edges(node)): + yield edge[1] + + def first_successor_by_type(self, node: NodeType, node_types_of_interest: list) -> NodeType|None: + for edge in filter(lambda edge: any(isinstance(edge[1], node_type) for node_type in node_types_of_interest), self.__dependency_graph.out_edges(node)): + return edge[1] + return None + + def nodes_by_type(self, node_types_of_interest: list) -> Generator[NodeType, None, None]: + yield from filter(lambda node: any(isinstance(node, node_type) for node_type in node_types_of_interest), self.__dependency_graph.nodes) + + def is_acyclic(self) -> bool: + return nx.is_directed_acyclic_graph(self.__dependency_graph) + + def get_cycles_as_str(self) -> List[str]: + return list(map(lambda list_of_nodes: " -> ".join(map(lambda node: node.name, list_of_nodes)), nx.simple_cycles(self.__dependency_graph))) + + def class_nodes(self) -> Generator[TSLDependencyGraph.PrimitiveClassNode, None, None]: + for primitive_class in self.__tsl_lib.primitive_class_set: + yield self.PrimitiveClassNode(primitive_class.name) + + def get_required_primitives(self, node: NodeType, self_contained: bool = False) -> Generator[TSLDependencyGraph.PrimitiveNode, None, None]: + yield from self.traverse_by_type([node], [TSLDependencyGraph.PrimitiveNode], True, self_contained) + + def get_dependent_primitives(self, node: NodeType, self_contained: bool = False) -> Generator[TSLDependencyGraph.PrimitiveNode, None, None]: + yield from self.traverse_by_type([node], [TSLDependencyGraph.PrimitiveNode], False, self_contained) + + def get_associated_class(self, node: NodeType) -> TSLDependencyGraph.PrimitiveClassNode: + if isinstance(node, self.PrimitiveClassNode): + return node + elif isinstance(node, TSLDependencyGraph.PrimitiveNode): + successor = self.first_successor_by_type(node, [self.PrimitiveClassNode]) + if successor is not None: + return successor + raise Exception(f"Primitive {node.name} has no associated class.") + elif isinstance(node, self.PrimitiveTestNode): + primitive_node = self.first_successor_by_type(node, [TSLDependencyGraph.PrimitiveNode]) + if primitive_node is not None: + successor = self.first_successor_by_type(primitive_node, [self.PrimitiveClassNode]) + if successor is not None: + return successor + raise Exception(f"Primitive {node.name} has no associated class.") + raise Exception(f"Test {node.name} has no associated primitive.") + + def get_required_classes(self, node: NodeType) -> Set[TSLDependencyGraph.PrimitiveClassNode]: + node_set: Set[TSLDependencyGraph.PrimitiveClassNode] = set() + if not isinstance(node, TSLDependencyGraph.PrimitiveClassNode): + node_set.add(self.get_associated_class(node)) + for child_node in self.traverse_by_type([node], [TSLDependencyGraph.PrimitiveNode, TSLDependencyGraph.PrimitiveTestNode], True, False): + node_set.add(self.get_associated_class(child_node)) + return node_set + + def get_dependent_classes(self, node: NodeType) -> Set[TSLDependencyGraph.PrimitiveClassNode]: + node_set: Set[TSLDependencyGraph.PrimitiveClassNode] = set() + if not isinstance(node, TSLDependencyGraph.PrimitiveClassNode): + node_set.add(self.get_associated_class(node)) + for child_node in self.traverse_by_type([node], [TSLDependencyGraph.PrimitiveNode, TSLDependencyGraph.PrimitiveTestNode], False, False): + node_set.add(self.get_associated_class(child_node)) + + def sorted_classes_as_string(self) -> Generator[str, None, None]: + class_graph = nx.DiGraph() + for cls in self.class_nodes(): + class_graph.add_node(cls) + for required_class in self.get_required_classes(cls): + if required_class != cls: + class_graph.add_edge(required_class, cls) + try: + ordered_class_graph = nx.topological_sort(class_graph) + except nx.NetworkXUnfeasible: + print("Unable to sort class graph.") + exit(1) + for cls in ordered_class_graph: + yield cls.name + + def get_required_tests(self, node: NodeType) -> Set[TSLDependencyGraph.PrimitiveTestNode]: + node_set: Set[TSLDependencyGraph.PrimitiveTestNode] = set() + for child_node in self.traverse_by_type([node], [TSLDependencyGraph.PrimitiveTestNode], True, False): + node_set.add(child_node) + return node_set + + def get_dependent_tests(self, node: NodeType) -> Set[TSLDependencyGraph.PrimitiveTestNode]: + node_set: Set[TSLDependencyGraph.PrimitiveTestNode] = set() + for successor_node in self.traverse_by_type([node], [TSLDependencyGraph.PrimitiveTestNode], False, False): + node_set.add(successor_node) + return node_set + + def tested_primitive_count(self) -> int: + nodes = self.nodes_by_type([TSLDependencyGraph.PrimitiveNode]) + result = 0 + for primitive_node in nodes: + if len([test_case for test_case in self.predecessors_by_type(primitive_node, [TSLDependencyGraph.PrimitiveTestNode])]) > 0: + result += 1 + return result + + def missing_tests(self) -> Generator[TSLDependencyGraph.PrimitiveNode, None, None]: + nodes = self.nodes_by_type([TSLDependencyGraph.PrimitiveNode]) + for primitive_node in nodes: + if self.first_predecessor_by_type(primitive_node, [TSLDependencyGraph.PrimitiveTestNode]) is None: + yield primitive_node + + def unsafe_tests_as_str(self) -> Generator[str, None, None]: + def traverse_dfs(node: TSLDependencyGraph.NodeType, output_str: str) -> Generator[str, None, None]: + if self.first_successor_by_type(node, [TSLDependencyGraph.PrimitiveTestNode, TSLDependencyGraph.PrimitiveNode]) is None: + yield output_str + else: + if isinstance(node, TSLDependencyGraph.PrimitiveNode): + for successor_node in self.successors_by_type(node, [TSLDependencyGraph.PrimitiveTestNode]): + yield from traverse_dfs(successor_node, f"{output_str}") + elif isinstance(node, TSLDependencyGraph.PrimitiveTestNode): + test = node.name.split("::") + for successor_node in self.successors_by_type(node, [TSLDependencyGraph.PrimitiveNode]): + if len(output_str) == 0: + yield from traverse_dfs(successor_node, f"{test[0]}::<{test[1]}>") + else: + yield from traverse_dfs(successor_node, f"{test[0]}::<{test[1]}> -> {output_str}") + + for unsafe_primitive in self.missing_tests(): + for message in traverse_dfs(unsafe_primitive, f"{unsafe_primitive.name}::"): + if message != f"{unsafe_primitive.name}::": + yield f"{message}" + + + def as_str(self, include_tests: bool = False) -> str: + class_count = sum(1 for _ in self.nodes_by_type([TSLDependencyGraph.PrimitiveClassNode])) + primitives_count = sum(1 for _ in self.nodes_by_type([TSLDependencyGraph.PrimitiveNode])) + + missing_tests = [primitive.name for primitive in self.missing_tests()] + unsafe_test = [message for message in self.unsafe_tests_as_str()] + unsafe_set = {test_chain.split(" ")[0] for test_chain in unsafe_test} + longest_primitive_name = max(len(max(missing_tests, key=len)), len(max(unsafe_test, key=len))) + output_missing_tests = "\n".join([' ' + primitive for primitive in missing_tests]) + output_unsafe_tests = "\n".join([' ' + message for message in unsafe_test]) + result = f"""TSL - Summary: + - # Primitive Classes: {class_count} + - # Primitives: {primitives_count}""" + if include_tests: + tests_count = sum(1 for _ in self.nodes_by_type([TSLDependencyGraph.PrimitiveTestNode])) + tested_primitive_count = self.tested_primitive_count() + test_coverage = tested_primitive_count / primitives_count + average_tests_per_primitive = (tests_count / primitives_count) + result = f"""{result} + - # Tests: {tests_count} + - Primitives w/ Tests: {tested_primitive_count} + - Primitives w/o Tests: +{output_missing_tests} + {'='*longest_primitive_name} + {primitives_count - tested_primitive_count} + - Unsafe Tests: +{output_unsafe_tests} + {'='*longest_primitive_name} + {len(unsafe_set)} ({len(unsafe_test)} specific missing dependencies) + - Test Coverage: {test_coverage * 100:.2f}% + - Avg. Tests/Primitive: {average_tests_per_primitive:.2f}""" + return result + + def draw(self, out_name: str = "dependency_graph"): + from networkx.drawing.nx_agraph import to_agraph + g = to_agraph(self.__dependency_graph) + # pos = nx.nx_agraph.graphviz_layout(self.__graph) + g.layout() + config.generation_out_path.joinpath(out_name).with_suffix(".png").parent.mkdir(parents=True, exist_ok=True) + g.draw(config.generation_out_path.joinpath(out_name).with_suffix(".png"), prog='dot') + + def to_pandas(self) -> Tuple[pd.DataFrame, pd.DataFrame]: + edge_list = [] + node_list = [] + # Iterate over edges and nodes in the dependency graph + for source, target in self.__dependency_graph.edges(): + source_node = source + target_node = target + edge_data = { + 'from': source_node.id(), + 'to': target_node.id(), + 'weight': 1, + 'strength': "medium", + **self.__dependency_graph.get_edge_data(source, target) + } + edge_list.append(edge_data) + for node in self.__dependency_graph.nodes(): + node_data = { + 'id': node.id(), + **node.attributes() + } + node_list.append(node_data) + edge_df = pd.DataFrame(edge_list) + node_df = pd.DataFrame(node_list) + return edge_df, node_df + + def to_json(self, out_name: str = "dependency_graph") -> None: + edge_df, node_df = self.to_pandas() + edge_df.to_json(config.generation_out_path.joinpath(out_name).with_suffix(".edges.json")) + node_df.to_json(config.generation_out_path.joinpath(out_name).with_suffix(".nodes.json")) + + def to_jaal(self) -> None: + from jaal import Jaal + edge_df, node_df = self.to_pandas() + port=8050 + while True: + try: + Jaal(edge_df, node_df).plot(directed=True,port=port) + except: + port+=1 + diff --git a/generator/core/ctrl/tsl_dependendies.py b/generator/core/ctrl/tsl_dependendies.py deleted file mode 100644 index 21de4c92..00000000 --- a/generator/core/ctrl/tsl_dependendies.py +++ /dev/null @@ -1,357 +0,0 @@ -from __future__ import annotations -from generator.core.tsl_config import config -from generator.core.ctrl.tsl_lib import TSLLib - -from typing import Generator, Dict, Iterator, Set, List, Tuple -import re -import networkx as nx -from dataclasses import dataclass -import pandas as pd - -class TSLPrimitiveRegex: - def __init__(self): - self.tsl_primitive_regex_parts = { - "namespace": rf'\s+({config.lib_namespace}::)?', - "functor_namespace": rf'\s+({config.lib_namespace}::)?({config.implementation_namespace}::)?', - "template": r'<[^;]*>', - "parameters": r'\(([^)]*)\)?', - } - - def primitive_name_to_regex(self, primitive_name: str) -> str: - return rf'({self.tsl_primitive_regex_parts["namespace"]}({primitive_name})\s*{self.tsl_primitive_regex_parts["template"]}\s*{self.tsl_primitive_regex_parts["parameters"]})' - - def functor_name_to_regex(self, primitive_name: str) -> str: - return rf'({self.tsl_primitive_regex_parts["functor_namespace"]}({primitive_name})\s*{self.tsl_primitive_regex_parts["template"]}\s*{self.tsl_primitive_regex_parts["parameters"]})' - - def primitives_to_regex(self, primitives: Generator[Tuple[str, str], None, None]) -> str: - primitives_regex_list = [] - for primitive_name, functor_name in primitives: - primitives_regex_list.append(self.primitive_name_to_regex(primitive_name)) - if functor_name is not None and functor_name != "" and functor_name != primitive_name: - primitives_regex_list.append(self.functor_name_to_regex(functor_name)) - for x in primitives_regex_list: - print(x) - return rf'({"|".join(primitives_regex_list)})' - - def get_primitive_name_from_match(self, match, group_count: int) -> str: - found_count = 0 - for i in range(group_count, 0, -1): - if match.group(i) is not None: - found_count += 1 - if found_count == 2: - return match.group(i) - -class TSLDependencyGraphDeprecated: - def __init__(self, tsl_lib: TSLLib): - self.__tsl_lib = tsl_lib - self.__primitive_regex_helper = TSLPrimitiveRegex() - self.__primitive_regex = re.compile(self.__primitive_regex_helper.primitives_to_regex(tsl_lib.known_primitives_names_and_functor)) - print(f"REGEX: {self.__primitive_regex_helper.primitives_to_regex(tsl_lib.known_primitives_names_and_functor)}") - self.__primitive_to_class_dict: Dict[str, str] = {} - for class_name, primitive in tsl_lib.known_primitives: - self.__primitive_to_class_dict[primitive.declaration.name] = class_name - if primitive.declaration.functor_name is not None and primitive.declaration.functor_name != "" and primitive.declaration.functor_name != primitive.declaration.name: - self.__primitive_to_class_dict[primitive.declaration.functor_name] = class_name - - - def get_dependencies(self, implementation_str: str) -> List[str]: - dependencies: Set[str] = set() - for match in self.__primitive_regex.finditer(implementation_str): - required_primitive = self.__primitive_regex_helper.get_primitive_name_from_match(match, self.__primitive_regex.groups) - required_class = self.__primitive_to_class_dict[required_primitive] - dependencies.add(required_class) - return list(dependencies) - - def sort_tsl_classes(self, primitive_generator_implementation_fun_name: str): - class_set: Set[str] = {c.name for c in self.__tsl_lib.primitive_class_set} - dependency_graph: nx.DiGraph = nx.DiGraph() - for class_name, primitive in self.__tsl_lib.known_primitives: - fun = getattr(primitive, primitive_generator_implementation_fun_name) - for implementation_str in fun(False): - for match in self.__primitive_regex.finditer(implementation_str): - required_primitive = self.__primitive_regex_helper.get_primitive_name_from_match(match, self.__primitive_regex.groups) - print(f"primitive: {primitive.declaration.name} - required_primitive: {required_primitive}") - - required_class = self.__primitive_to_class_dict[required_primitive] - if (required_class != class_name) and (required_primitive != primitive.declaration.name): - dependency_graph.add_nodes_from([class_name, required_class]) - dependency_graph.add_edge(required_class, class_name) - class_set.discard(class_name) - class_set.discard(required_class) - print(f"{class_name}::{primitive.declaration.name} requires {required_class}::{required_primitive}") - try: - order = nx.topological_sort(dependency_graph) - except nx.NetworkXUnfeasible: - print("Cyclic dependency detected. Please fix this first.") - exit(1) - for class_name in class_set: - yield class_name - for primitive_class in order: - yield primitive_class - - -class TSLDependencyGraph: - @dataclass(order=True, unsafe_hash=True, frozen=True) - class PrimitiveClassNode: - name: str - type: str = "class" - size: int = 10 - def __str__(self): - return f"{self.name}" - def __repr__(self): - return str(self) - def id(self): - return self.name - def attributes(self): - return {"name": self.name, "type": self.type, "size": self.size} - @dataclass(order=True, unsafe_hash=True, frozen=True) - class PrimitiveNode: - name: str - type: str = "primitive" - size: int = 5 - def __str__(self): - return f"{self.name}" - def __repr__(self): - return str(self) - def id(self): - return self.name - def attributes(self): - return {"name": self.name, "type": self.type, "size": self.size} - @dataclass(order=True, unsafe_hash=True, frozen=True) - class PrimitiveTestNode: - name: str - type: str = "test" - size: int = 3 - def __str__(self): - return f"{self.name}" - def __repr__(self): - return str(self) - def id(self): - return self.name - def attributes(self): - return {"name": self.name, "type": self.type, "size": self.size} - - @property - def graph(self) -> nx.DiGraph: - return self.__dependency_graph - - def __init__(self, tsl_lib: TSLLib) -> None: - self.__tsl_lib = tsl_lib - self.__dependency_graph: nx.DiGraph = nx.DiGraph() - for primitive_class in self.__tsl_lib.primitive_class_set: - self.__dependency_graph.add_node(self.PrimitiveClassNode(primitive_class.name)) - for class_name, primitive in self.__tsl_lib.known_primitives: - self.__dependency_graph.add_node(self.PrimitiveNode(primitive.declaration.functor_name)) - self.__dependency_graph.add_edge(self.PrimitiveNode(primitive.declaration.functor_name), self.PrimitiveClassNode(class_name), label="part of") - - self.__primitive_regex_str: str = rf'(? None: - for _, primitive in self.__tsl_lib.known_primitives: - for test_name, implementation_str in primitive.get_tests_implementations(False): - fq_test_name = f"{primitive.declaration.functor_name}::{test_name}" - self.__dependency_graph.add_node(self.PrimitiveTestNode(fq_test_name)) - self.__dependency_graph.add_edge(self.PrimitiveTestNode(fq_test_name), self.PrimitiveNode(primitive.declaration.functor_name), label="test of") - for match in self.__primitive_regex.finditer(implementation_str): - required_primitive = match.group(2) - if required_primitive != primitive.declaration.functor_name: - self.__dependency_graph.add_edge(self.PrimitiveNode(required_primitive), self.PrimitiveTestNode(fq_test_name), label="depends on") - - def __has_primitive(self, primitive_name: str) -> bool: - return self.PrimitiveNode(primitive_name) in self.__dependency_graph - - def __traverse_from_primitives(self, primitives_names: List[str], node_types_of_interest: list, reversed: bool, self_contained: bool = False) -> Set[str]: - unknown_primitives = list(filter(lambda primitive_name: not self.__has_primitive(primitive_name), primitives_names)) - if len(unknown_primitives) > 0: - raise Exception(f"Primitives {', '.join(map(str, unknown_primitives))} not found in dependency graph.") - else: - self_set = set(primitives_names) if self_contained else set() - #edge[0]: source, edge[1]: target - return {*(primitive_name for current_primitive_name in primitives_names for primitive_name in - map( - lambda edge: edge[1].name, - filter( - lambda edge: any(isinstance(edge[1], node_type) for node_type in node_types_of_interest), - nx.bfs_edges(self.__dependency_graph, self.PrimitiveNode(current_primitive_name), reverse=reversed))) - ), *self_set} - - def is_acyclic(self) -> bool: - return nx.is_directed_acyclic_graph(self.__dependency_graph) - - def get_cycles_as_str(self) -> List[str]: - return list(map(lambda list_of_nodes: " -> ".join(map(lambda node: node.name, list_of_nodes)), nx.simple_cycles(self.__dependency_graph))) - - def get_direct_predecessor_names(self, node, node_types_of_interest: list) -> List[str]: - return list(map(lambda edge: edge[0].name, filter(lambda edge: any(isinstance(edge[0], node_type) for node_type in node_types_of_interest), self.__dependency_graph.in_edges(node)))) - - def get_primitive_nodes(self) -> List[TSLDependencyGraph.PrimitiveNode]: - return list(filter(lambda node: isinstance(node, self.PrimitiveNode), self.__dependency_graph.nodes)) - - def get_primitives_count(self) -> int: - return sum(1 for _ in filter(lambda node: isinstance(node, self.PrimitiveNode), self.__dependency_graph.nodes)) - - def get_class_nodes(self) -> List[TSLDependencyGraph.PrimitiveClassNode]: - return list(filter(lambda node: isinstance(node, self.PrimitiveClassNode), self.__dependency_graph.nodes)) - - def class_nodes(self) -> Iterator[TSLDependencyGraph.PrimitiveClassNode]: - return filter(lambda node: isinstance(node, self.PrimitiveClassNode), self.__dependency_graph.nodes) - - def get_classes_count(self) -> int: - return sum(1 for _ in filter(lambda node: isinstance(node, self.PrimitiveClassNode), self.__dependency_graph.nodes)) - - def get_test_nodes(self) -> List[TSLDependencyGraph.PrimitiveTestNode]: - return list(filter(lambda node: isinstance(node, self.PrimitiveTestNode), self.__dependency_graph.nodes)) - - def get_tests_count(self) -> int: - return sum(1 for _ in filter(lambda node: isinstance(node, self.PrimitiveTestNode), self.__dependency_graph.nodes)) - - def get_required_primitives(self, primitive_names: str|List[str], self_contained: bool = False) -> List[str]: - if isinstance(primitive_names, str): - primitive_names = primitive_names.split(" ") - return list(self.__traverse_from_primitives(primitive_names, [self.PrimitiveNode], True, self_contained)) - - def get_dependent_primitives(self, primitive_names: str|List[str], self_contained: bool = False) -> List[str]: - if isinstance(primitive_names, str): - primitive_names = primitive_names.split(" ") - return list(self.__traverse_from_primitives(primitive_names, [self.PrimitiveNode], False, self_contained)) - - def get_associated_class(self, primitive_name) -> str: - if not self.__has_primitive(primitive_name): - raise Exception(f"Primitive {primitive_name} not found in dependency graph.") - else: - for successor in self.__dependency_graph.successors(self.PrimitiveNode(primitive_name)): - if isinstance(successor, self.PrimitiveClassNode): - return successor.name - raise Exception(f"Primitive {primitive_name} has no associated class.") - - def get_required_classes(self, primitive_names: str|List[str]) -> List[str]: - required_primitives = self.get_required_primitives(primitive_names, True) - return list({ - self.get_associated_class(required_primitive) for required_primitive in required_primitives - }) - - def get_dependent_classes(self, primitive_names: str|List[str]) -> List[str]: - dependent_primitives = self.get_dependent_primitives(primitive_names, True) - return list({ - self.get_associated_class(dependent_primitive) for dependent_primitive in dependent_primitives - }) - - def sorted_classes(self) -> Generator[str, None, None]: - class_graph = nx.DiGraph() - for cls in self.class_nodes(): - class_graph.add_node(cls.name) - for predecessor in self.get_required_classes(self.get_direct_predecessor_names(cls, [self.PrimitiveNode])): - if predecessor != cls.name: - if False: - if predecessor == "calc" and cls.name == "convert": - for x in self.get_direct_predecessor_names(cls, [self.PrimitiveNode]): - for requc in self.get_required_classes([x]): - print(f"{x} ---> {requc}") - class_graph.add_edge(predecessor, cls.name) - try: - ordered_class_graph = nx.topological_sort(class_graph) - except nx.NetworkXUnfeasible: - print("Unable to sort class graph.") - exit(1) - for cls in ordered_class_graph: - yield cls - - def get_required_tests(self, primitive_names: str|List[str]) -> List[str]: - if isinstance(primitive_names, str): - primitive_names = primitive_names.split(" ") - return list(self.__traverse_from_primitives(primitive_names, [self.PrimitiveTestNode], True)) - - def get_dependent_tests(self, primitive_names: str|List[str]) -> List[str]: - if isinstance(primitive_names, str): - primitive_names = primitive_names.split(" ") - return list(self.__traverse_from_primitives(primitive_names, [self.PrimitiveTestNode], False)) - - def tested_primitive_count(self) -> int: - nodes = self.get_primitive_nodes() - result = 0 - for primitive_node in nodes: - test_cases = self.get_direct_predecessor_names(primitive_node, [self.PrimitiveTestNode]) - if len(test_cases) > 0: - result += 1 - return result - - def missing_tests(self) -> Generator[str, None, None]: - nodes = self.get_primitive_nodes() - for primitive_node in nodes: - test_cases = self.get_direct_predecessor_names(primitive_node, [self.PrimitiveTestNode]) - if len(test_cases) == 0: - yield primitive_node.name - - def as_str(self, include_tests: bool = False) -> str: - class_count = self.get_classes_count() - primitives_count = self.get_primitives_count() - result = f"""TSL - Summary: - - # Primitive Classes: {class_count} - - # Primitives: {primitives_count}""" - if include_tests: - tests_count = self.get_tests_count() - tested_primitive_count = self.tested_primitive_count() - test_coverage = tested_primitive_count / primitives_count - average_tests_per_primitive = (tests_count / primitives_count) - result = f"""{result} - - # Tests: {tests_count} - - Primitives w/ Tests: {tested_primitive_count} - - Primitives w/o Tests: {primitives_count - tested_primitive_count} - - Test Coverage: {test_coverage * 100:.2f}% - - Avg. Tests/Primitive: {average_tests_per_primitive:.2f}""" - return result - - def draw(self, out_name: str = "dependency_graph"): - from networkx.drawing.nx_agraph import to_agraph - g = to_agraph(self.__dependency_graph) - # pos = nx.nx_agraph.graphviz_layout(self.__graph) - g.layout() - config.generation_out_path.joinpath(out_name).with_suffix(".png").parent.mkdir(parents=True, exist_ok=True) - g.draw(config.generation_out_path.joinpath(out_name).with_suffix(".png"), prog='dot') - - def to_pandas(self) -> Tuple[pd.DataFrame, pd.DataFrame]: - edge_list = [] - node_list = [] - # Iterate over edges and nodes in the dependency graph - for source, target in self.__dependency_graph.edges(): - source_node = source - target_node = target - edge_data = { - 'from': source_node.id(), - 'to': target_node.id(), - 'weight': 1, - 'strength': "medium", - **self.__dependency_graph.get_edge_data(source, target) - } - edge_list.append(edge_data) - for node in self.__dependency_graph.nodes(): - node_data = { - 'id': node.id(), - **node.attributes() - } - node_list.append(node_data) - edge_df = pd.DataFrame(edge_list) - node_df = pd.DataFrame(node_list) - return edge_df, node_df - - def to_json(self, out_name: str = "dependency_graph") -> None: - edge_df, node_df = self.to_pandas() - edge_df.to_json(config.generation_out_path.joinpath(out_name).with_suffix(".edges.json")) - node_df.to_json(config.generation_out_path.joinpath(out_name).with_suffix(".nodes.json")) - - def to_jaal(self) -> None: - from jaal import Jaal - edge_df, node_df = self.to_pandas() - port=8050 - while True: - try: - Jaal(edge_df, node_df).plot(directed=True,port=port) - except: - port+=1 diff --git a/generator/core/ctrl/tsl_libfile_generator.py b/generator/core/ctrl/tsl_libfile_generator.py index b79e36d7..b8f7c8e2 100644 --- a/generator/core/ctrl/tsl_libfile_generator.py +++ b/generator/core/ctrl/tsl_libfile_generator.py @@ -15,7 +15,7 @@ from generator.utils.file_utils import strip_common_path_prefix from generator.utils.log_utils import LogInit from generator.utils.yaml_utils import yaml_load, YamlDataType -from generator.core.ctrl.tsl_dependendies import TSLDependencyGraph +from generator.core.ctrl.tsl_dependencies import TSLDependencyGraph @@ -161,7 +161,7 @@ def __sort_header_files(self, sorted_keys: List[str], header_files: List[TSLHead return result @LogInit() - def __init__(self, lib: TSLLib) -> None: + def __init__(self, lib: TSLLib, dependency_graph: TSLDependencyGraph) -> None: self.__static_files: List[TSLHeaderFile] = [] self.__extension_name_to_file_dict: Dict[str, TSLHeaderFile] = {} self.__primitive_class_declarations: List[TSLHeaderFile] = [] @@ -175,14 +175,18 @@ def __init__(self, lib: TSLLib) -> None: # print("Checking implementation dependencies:") # ordered_primitive_classes = list(dep_graph.sort_tsl_classes("get_implementations")) + sorted_classes_prefix: List[str] = [p for p in dependency_graph.sorted_classes_as_string()] + print("Sorting includes according to the following order: " + ", ".join(sorted_classes_prefix)) + include_order_dict = {prefix: index for index, prefix in enumerate(sorted_classes_prefix)} + include_sort_fun = lambda x: [include_order_dict[ref] for ref in sorted_classes_prefix if x.file_name.stem.startswith(ref)] generated_files_root: TSLHeaderFile = TSLHeaderFile.create_from_dict(config.lib_generated_files_root_header, {}) for extension_file in self.extension_files: generated_files_root.add_file_include(extension_file) # for primitive_declaration in self.__sort_header_files(ordered_primitive_classes, list(self.primitive_declaration_files)): - for primitive_declaration in self.primitive_declaration_files: + for primitive_declaration in sorted(self.primitive_declaration_files, key=include_sort_fun): generated_files_root.add_file_include(primitive_declaration) - for primitive_definition in self.primitive_definition_files: + for primitive_definition in sorted(self.primitive_definition_files, key=include_sort_fun): generated_files_root.add_file_include(primitive_definition) for runtime_header in lib.relevant_runtime_headers: generated_files_root.add_predefined_tsl_file_include(f'"{runtime_header.name}"') diff --git a/generator/core/tsl_generator.py b/generator/core/tsl_generator.py index 45af16bb..b902362d 100644 --- a/generator/core/tsl_generator.py +++ b/generator/core/tsl_generator.py @@ -10,7 +10,7 @@ from generator.core.model.tsl_extension import TSLExtensionSet from generator.core.model.tsl_primitive import TSLPrimitiveClass, TSLPrimitiveClassSet from generator.core.tsl_config import config -from generator.core.ctrl.tsl_dependendies import TSLDependencyGraph +from generator.core.ctrl.tsl_dependencies import TSLDependencyGraph from generator.expansions.tsl_readme_md import create_readme from generator.expansions.tsl_translation_unit import TSLTranslationUnitContainer from generator.expansions.tsl_unit_test import TSLTestGenerator @@ -189,26 +189,8 @@ def generate(self, relevant_hardware_flags: List[str] = None, relevant_primitive lib: TSLLib = TSLLib(relevant_extensions_set, relevant_primitives_class_set) dep_graph = TSLDependencyGraph(lib) - if not dep_graph.is_acyclic(): - self.log(logging.ERROR, "Dependency graph for primitive definitions is not acyclic. Please check your dependencies.") - for cycle in dep_graph.get_cycles_as_str(): - self.log(logging.ERROR, f"Cycle: {cycle}") - return - for x in dep_graph.sorted_classes(): - print(f"#include {x}") - print("DONE") - dep_graph.inspect_tests() - if not dep_graph.is_acyclic(): - self.log(logging.ERROR, "Dependency graph for primitive definitions is not acyclic. Please check your dependencies.") - for cycle in dep_graph.get_cycles_as_str(): - self.log(logging.ERROR, f"Cycle: {cycle}") - return - print(dep_graph.as_str(True)) - for missing_test in dep_graph.missing_tests(): - self.log(logging.WARNING, f"Missing test for primitive {missing_test}") - - - file_generator: TSLFileGenerator = TSLFileGenerator(lib) + + file_generator: TSLFileGenerator = TSLFileGenerator(lib, dep_graph) if not config.print_output_only: for path in file_generator.out_pathes: self.log(logging.INFO, f"Creating directory {path}") @@ -222,7 +204,8 @@ def generate(self, relevant_hardware_flags: List[str] = None, relevant_primitive cmake_config = config.get_expansion_config("cmake") tsl_translation_units: TSLTranslationUnitContainer = TSLTranslationUnitContainer() - for path, tu in TSLTestGenerator.generate(lib): + + for path, tu in TSLTestGenerator.generate(lib, dep_graph): tsl_translation_units.add_tu(path, tu) if cmake_config["enabled"]: diff --git a/generator/expansions/tsl_unit_test.py b/generator/expansions/tsl_unit_test.py index b7004458..b67b8a37 100644 --- a/generator/expansions/tsl_unit_test.py +++ b/generator/expansions/tsl_unit_test.py @@ -19,6 +19,7 @@ from generator.utils.file_utils import strip_common_path_prefix from generator.utils.log_utils import LogInit from generator.utils.yaml_utils import YamlDataType, yaml_load +from generator.core.ctrl.tsl_dependencies import TSLDependencyGraph import os @@ -483,10 +484,22 @@ def __init__(self): pass @staticmethod - def generate(lib: TSLLib) -> Generator[Tuple[Path,TSLTranslationUnit], None, None]: + def generate(lib: TSLLib, dep_graph: TSLDependencyGraph) -> Generator[Tuple[Path,TSLTranslationUnit], None, None]: if not config.expansion_enabled("unit_tests"): + if not dep_graph.is_acyclic(): + self.log(logging.ERROR, "Dependency graph for primitive definitions is not acyclic. Please check your dependencies.") + for cycle in dep_graph.get_cycles_as_str(): + self.log(logging.ERROR, f"Cycle: {cycle}") return + dep_graph.inspect_tests() + if not dep_graph.is_acyclic(): + self.log(logging.ERROR, "Dependency graph for primitive definitions is not acyclic. Please check your dependencies.") + for cycle in dep_graph.get_cycles_as_str(): + self.log(logging.ERROR, f"Cycle: {cycle}") + return + print(dep_graph.as_str(True)) + unit_test_config: dict = config.get_expansion_config("unit_tests") tsltu: TSLTranslationUnit = TSLTranslationUnit(target_name="tsl_test") @@ -595,3 +608,6 @@ def generate(lib: TSLLib) -> Generator[Tuple[Path,TSLTranslationUnit], None, Non tsltu.add_source(tsf) yield root_path, tsltu + + + From 012821a83b805985d90d6e83fe5ca3a7e5858c76 Mon Sep 17 00:00:00 2001 From: Johannes Pietrzyk Date: Thu, 21 Sep 2023 13:15:38 +0200 Subject: [PATCH 41/47] Fixed sfinae --- .../core/primitive_declaration.template | 2 +- generator/core/tsl_generator.py | 15 +++++- generator/expansions/tsl_unit_test.py | 10 +--- .../static_files/core/utils/preprocessor.yaml | 47 ++++++++++++++----- .../expansions/tests/test_functions.yaml | 2 +- primitive_data/primitives/convert.yaml | 4 +- primitive_data/primitives/ls.yaml | 16 +++---- 7 files changed, 60 insertions(+), 36 deletions(-) diff --git a/generator/config/generator/tsl_templates/core/primitive_declaration.template b/generator/config/generator/tsl_templates/core/primitive_declaration.template index 5552ec4d..74d69f73 100644 --- a/generator/config/generator/tsl_templates/core/primitive_declaration.template +++ b/generator/config/generator/tsl_templates/core/primitive_declaration.template @@ -92,7 +92,7 @@ template< {{ returns['ctype'] }} {% else %} #ifdef {{ tsl_namespace|upper ~ '_' ~ tsl_implementation_namespace|upper ~ '_' ~ primitive_name|upper ~ '_STRUCT_DEFINED'}} - !tsl_primitive_ambiguous<{{ tsl_implementation_namespace }}::{{ functor_name }}<{{ vector_name }}, {{ ns.additional_template_name }}{{ idof_name }}>, {{ tsl_implementation_namespace }}::{{ functor_name }}<{{ vector_name }}, {{ ns.additional_template_name }}{{ idof_name }}>>(), + !tsl_primitive_ambiguous<{{ tsl_implementation_namespace }}::{{ primitive_name }}<{{ vector_name }}, {{ ns.additional_template_name }}{{ idof_name }}>, {{ tsl_implementation_namespace }}::{{ functor_name }}<{{ vector_name }}, {{ ns.additional_template_name }}{{ idof_name }}>>(), #else true, #endif diff --git a/generator/core/tsl_generator.py b/generator/core/tsl_generator.py index b902362d..6caea5bc 100644 --- a/generator/core/tsl_generator.py +++ b/generator/core/tsl_generator.py @@ -189,6 +189,11 @@ def generate(self, relevant_hardware_flags: List[str] = None, relevant_primitive lib: TSLLib = TSLLib(relevant_extensions_set, relevant_primitives_class_set) dep_graph = TSLDependencyGraph(lib) + if not dep_graph.is_acyclic(): + self.log(logging.ERROR, "Dependency graph for primitive definitions is not acyclic. Please check your dependencies.") + for cycle in dep_graph.get_cycles_as_str(): + self.log(logging.ERROR, f"Cycle: {cycle}") + return file_generator: TSLFileGenerator = TSLFileGenerator(lib, dep_graph) if not config.print_output_only: @@ -205,8 +210,14 @@ def generate(self, relevant_hardware_flags: List[str] = None, relevant_primitive tsl_translation_units: TSLTranslationUnitContainer = TSLTranslationUnitContainer() - for path, tu in TSLTestGenerator.generate(lib, dep_graph): - tsl_translation_units.add_tu(path, tu) + try: + for path, tu in TSLTestGenerator.generate(lib, dep_graph): + tsl_translation_units.add_tu(path, tu) + except Exception as e: + self.log(logging.ERROR, f"Error while generating test files. Exception: {str(e)}") + for cycle in dep_graph.get_cycles_as_str(): + self.log(logging.ERROR, f"Cycle: {cycle}") + raise e if cmake_config["enabled"]: TSLCMakeGenerator.generate_lib(lib, file_generator, tsl_translation_units, cmake_config) diff --git a/generator/expansions/tsl_unit_test.py b/generator/expansions/tsl_unit_test.py index b67b8a37..1977d129 100644 --- a/generator/expansions/tsl_unit_test.py +++ b/generator/expansions/tsl_unit_test.py @@ -486,18 +486,10 @@ def __init__(self): @staticmethod def generate(lib: TSLLib, dep_graph: TSLDependencyGraph) -> Generator[Tuple[Path,TSLTranslationUnit], None, None]: if not config.expansion_enabled("unit_tests"): - if not dep_graph.is_acyclic(): - self.log(logging.ERROR, "Dependency graph for primitive definitions is not acyclic. Please check your dependencies.") - for cycle in dep_graph.get_cycles_as_str(): - self.log(logging.ERROR, f"Cycle: {cycle}") return dep_graph.inspect_tests() - if not dep_graph.is_acyclic(): - self.log(logging.ERROR, "Dependency graph for primitive definitions is not acyclic. Please check your dependencies.") - for cycle in dep_graph.get_cycles_as_str(): - self.log(logging.ERROR, f"Cycle: {cycle}") - return + print(dep_graph.as_str(True)) unit_test_config: dict = config.get_expansion_config("unit_tests") diff --git a/generator/static_files/core/utils/preprocessor.yaml b/generator/static_files/core/utils/preprocessor.yaml index 5da07c05..f913d8c9 100755 --- a/generator/static_files/core/utils/preprocessor.yaml +++ b/generator/static_files/core/utils/preprocessor.yaml @@ -19,31 +19,52 @@ implementations: # define TSL_CVAL(type, value) std::integral_constant{} #endif - | - template - struct class_declared : std::false_type {}; - template - struct class_declared : std::true_type {}; + template struct class_declared : std::false_type {}; + template struct class_declared : std::true_type {}; - | template - struct tsl_functor_defined { + struct tsl_functor_template_defined { + // Define a helper function that checks if apply can be instantiated with specific arguments. + template + static auto has_valid_apply(Args&&...) -> decltype(std::declval().apply(std::declval()...), std::true_type{}); + // If the helper function is well-formed, it means apply can be instantiated with these arguments. + template + static std::true_type test(int, Args&&...); + // If the helper function is not well-formed, it means apply cannot be instantiated with these arguments. + static std::false_type test(...); + // Combine the results of the tests. + template + using type = decltype(test(0, std::declval()...)); + static constexpr bool value = type<>::value; + }; + - | + template + struct tsl_functor_plain_defined { template - static auto test(int) -> decltype(CurrentHelperStruct::apply, std::true_type{}); + static auto test(int) -> decltype(std::declval().apply, std::true_type{}); template static auto test(...) -> std::false_type; using type = decltype(test(0)); static constexpr bool value = type::value; }; + - | + template + struct tsl_functor_defined { + static constexpr bool value = + tsl_functor_template_defined::value || + tsl_functor_plain_defined::value; + }; - | template constexpr bool tsl_primitive_ambiguous() { - if constexpr ((class_declared::value)) { - if constexpr ((tsl_functor_defined::value) && (tsl_functor_defined::value)) { - return std::is_same_v; - } else { - return false; - } + if constexpr ((class_declared::value)) { + if constexpr ((tsl_functor_defined::value) && (tsl_functor_defined::value)) { + return std::is_same_v; } else { - return false; + return false; } + } else { + return false; + } } ... diff --git a/generator/static_files/expansions/tests/test_functions.yaml b/generator/static_files/expansions/tests/test_functions.yaml index e8ca78a0..dcafe603 100644 --- a/generator/static_files/expansions/tests/test_functions.yaml +++ b/generator/static_files/expansions/tests/test_functions.yaml @@ -142,7 +142,7 @@ implementations: private: template static auto call_set_impl(typename Vec::base_type const * data, std::index_sequence) { - return tsl::set(data[(Vec::vector_element_count()-1)-Is]...); + return tsl::set((typename Vec::base_type)data[(Vec::vector_element_count()-1)-Is]...); } public: static auto call_set(typename Vec::base_type const * data) { diff --git a/primitive_data/primitives/convert.yaml b/primitive_data/primitives/convert.yaml index 28a69147..5ddb0ad9 100644 --- a/primitive_data/primitives/convert.yaml +++ b/primitive_data/primitives/convert.yaml @@ -166,12 +166,12 @@ testing: - test_name: "default_int_float_conversion" requires: ["loadu", "storeu"] includes: [""] - implementation: + implementation: | using T = typename Vec::base_type; using T_int = typename ToType::base_type; if constexpr(std::is_same::value){ - std::size_t element_count = 1024 * 8 * 10000; + std::size_t element_count = 1024 * 8; testing::test_memory_helper_t test_helper{element_count, Vec::vector_element_count(), false}; testing::test_memory_helper_t test_helper_float{element_count, Vec::vector_element_count(), false}; diff --git a/primitive_data/primitives/ls.yaml b/primitive_data/primitives/ls.yaml index a99310db..b641343e 100644 --- a/primitive_data/primitives/ls.yaml +++ b/primitive_data/primitives/ls.yaml @@ -703,7 +703,7 @@ definitions: std::integer_sequence ) { return tsl::set( - (((T)(sizeof...(Is) - 1 - Is)))... + (T)(((T)(sizeof...(Is) - 1 - Is)))... ); }; return seq_fun(std::make_integer_sequence()); @@ -718,7 +718,7 @@ definitions: std::integer_sequence ) { return tsl::set( - (((T)(sizeof...(Is) - 1 - Is)))... + (T)(((T)(sizeof...(Is) - 1 - Is)))... ); }; return seq_fun(std::make_integer_sequence()); @@ -733,7 +733,7 @@ definitions: std::integer_sequence ) { return tsl::set( - (((T)(sizeof...(Is) - 1 - Is)))... + (T)(((T)(sizeof...(Is) - 1 - Is)))... ); }; return seq_fun(std::make_integer_sequence()); @@ -816,7 +816,7 @@ definitions: T const init, T const sw, std::integer_sequence ) { return tsl::set( - ((((T)(sizeof...(Is) - 1 - Is))*sw)+init)... + (T)((((T)(sizeof...(Is) - 1 - Is))*sw)+init)... ); }; return seq_fun(start, stepwidth, std::make_integer_sequence()); @@ -831,7 +831,7 @@ definitions: T const init, T const sw, std::integer_sequence ) { return tsl::set( - ((((T)(sizeof...(Is) - 1 - Is))*sw)+init)... + (T)((((T)(sizeof...(Is) - 1 - Is))*sw)+init)... ); }; return seq_fun(start, stepwidth, std::make_integer_sequence()); @@ -846,7 +846,7 @@ definitions: T const init, T const sw, std::integer_sequence ) { return tsl::set( - ((((T)(sizeof...(Is) - 1 - Is))*sw)+init)... + (T)((((T)(sizeof...(Is) - 1 - Is))*sw)+init)... ); }; return seq_fun(start, stepwidth, std::make_integer_sequence()); @@ -1482,7 +1482,7 @@ definitions: *memory = data_array[i]; memory += (mask>>i)&0b1; } - if(((mask>>Vec::vector_element_count())&0b1) == 0) { + if(((mask>>(Vec::vector_element_count()-1))&0b1) == 0) { *memory = safe[memory-orig_mem]; } #INTEL - AVX2 @@ -1501,7 +1501,7 @@ definitions: *memory = data_array[i]; memory += (mask>>i)&0b1; } - if(((mask>>Vec::vector_element_count())&0b1) == 0) { + if(((mask>>(Vec::vector_element_count()-1))&0b1) == 0) { *memory = safe[memory-orig_mem]; } - target_extension: "avx2" From 3ebb419edb448a18856e070f0621924f1423793a Mon Sep 17 00:00:00 2001 From: Johannes Pietrzyk Date: Thu, 21 Sep 2023 13:23:17 +0200 Subject: [PATCH 42/47] python3.8 compatibility Changed NodeType = PrimitiveClassNode| PrimitiveNode | PrimitiveTestNode to NodeType = Union[PrimitiveClassNode, PrimitiveNode, PrimitiveTestNode] --- generator/core/ctrl/tsl_dependencies.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/generator/core/ctrl/tsl_dependencies.py b/generator/core/ctrl/tsl_dependencies.py index bcb73aea..b82fda49 100644 --- a/generator/core/ctrl/tsl_dependencies.py +++ b/generator/core/ctrl/tsl_dependencies.py @@ -50,7 +50,7 @@ def id(self): def attributes(self): return {"name": self.name, "type": self.type, "size": self.size} - NodeType = PrimitiveClassNode | PrimitiveNode | PrimitiveTestNode + NodeType = Union[PrimitiveClassNode, PrimitiveNode, PrimitiveTestNode] @property def graph(self) -> nx.DiGraph: From 528d56e707564a83767f0cb6decbe765cd6aff02 Mon Sep 17 00:00:00 2001 From: Johannes Pietrzyk Date: Thu, 21 Sep 2023 14:59:05 +0200 Subject: [PATCH 43/47] Added to_mask definition for avx (without avx2) --- generator/core/ctrl/tsl_dependencies.py | 3 ++- primitive_data/primitives/mask.yaml | 28 +++++++++++++++++++------ 2 files changed, 24 insertions(+), 7 deletions(-) diff --git a/generator/core/ctrl/tsl_dependencies.py b/generator/core/ctrl/tsl_dependencies.py index b82fda49..4a623322 100644 --- a/generator/core/ctrl/tsl_dependencies.py +++ b/generator/core/ctrl/tsl_dependencies.py @@ -72,7 +72,8 @@ def __init__(self, tsl_lib: TSLLib) -> None: for implementation_str in primitive.get_implementations(False): for match in self.__primitive_regex.finditer(implementation_str): required_primitive = match.group(2) - self.__dependency_graph.add_edge(TSLDependencyGraph.PrimitiveNode(required_primitive), TSLDependencyGraph.PrimitiveNode(primitive.declaration.functor_name), label="requires") + if required_primitive != primitive.declaration.functor_name: + self.__dependency_graph.add_edge(TSLDependencyGraph.PrimitiveNode(required_primitive), TSLDependencyGraph.PrimitiveNode(primitive.declaration.functor_name), label="requires") def inspect_tests(self) -> None: for _, primitive in self.__tsl_lib.known_primitives: diff --git a/primitive_data/primitives/mask.yaml b/primitive_data/primitives/mask.yaml index 05af8d90..7f9101cf 100644 --- a/primitive_data/primitives/mask.yaml +++ b/primitive_data/primitives/mask.yaml @@ -401,7 +401,7 @@ definitions: #INTEL - AVX2 - target_extension: "avx2" ctype: ["int8_t", "uint8_t"] - lscpu_flags: ['avx2'] + lscpu_flags: ['avx', 'avx2'] specialization_comment: "@todo: Verify!" implementation: | auto const permute_vec = _mm256_set_epi64x(0x303030303030303, 0x202020202020202, 0x101010101010101, 0x0); @@ -411,9 +411,25 @@ definitions: auto const and_shuffled_vec = _mm256_and_si256(shuffled_vec, and_vec); auto result = _mm256_cmpeq_epi8(and_shuffled_vec, and_vec); return result; + - target_extension: "avx2" + ctype: ["int8_t", "uint8_t", "int16_t", "uint16_t", "int32_t", "uint32_t", "int64_t", "uint64_t"] + lscpu_flags: ['sse2'] + implementation: | + using sse_type = simd; + auto tmp128_lo = to_mask::apply(mask); + auto tmp128_hi = to_mask::apply(mask >> sse_type::vector_element_count()); + return _mm256_set_m128i(tmp128_hi, tmp128_lo); + - target_extension: "avx2" + ctype: ["float", "double"] + lscpu_flags: ['sse2'] + implementation: | + using sse_type = simd; + auto tmp128_lo = to_mask::apply(mask); + auto tmp128_hi = to_mask::apply(mask >> sse_type::vector_element_count()); + return _mm256_castsi256_{{ intrin_tp[ctype][1] }}(_mm256_set_m128i(tmp128_hi, tmp128_lo)); - target_extension: "avx2" ctype: ["int16_t", "uint16_t"] - lscpu_flags: ['avx2'] + lscpu_flags: ['avx', 'avx2'] implementation: | auto const and_vec = _mm256_set_epi64x(0x8000400020001000, 0x800040002000100, 0x80004000200010, 0x8000400020001); auto const data_vec = _mm256_set1_epi16(mask); @@ -421,7 +437,7 @@ definitions: return _mm256_cmpeq_epi16(anded_vec, and_vec); - target_extension: "avx2" ctype: ["int32_t", "uint32_t"] - lscpu_flags: ['avx2'] + lscpu_flags: ['avx', 'avx2'] implementation: | auto const and_vec = _mm256_set_epi64x(0x8000000040, 0x2000000010, 0x800000004, 0x200000001); auto const data_vec = _mm256_set1_epi32(mask); @@ -429,7 +445,7 @@ definitions: return _mm256_cmpeq_epi32(anded_vec, and_vec); - target_extension: "avx2" ctype: ["float"] - lscpu_flags: ['avx2'] + lscpu_flags: ['avx', 'avx2'] implementation: | auto const and_vec = _mm256_set_epi64x(0x8000000040, 0x2000000010, 0x800000004, 0x200000001); auto const data_vec = _mm256_set1_epi32(mask); @@ -437,7 +453,7 @@ definitions: return _mm256_castsi256_ps(_mm256_cmpeq_epi32(anded_vec, and_vec)); - target_extension: "avx2" ctype: ["int64_t", "uint64_t"] - lscpu_flags: ['avx2'] + lscpu_flags: ['avx', 'avx2'] implementation: | auto const and_vec = _mm256_set_epi64x(0x8, 0x4, 0x2, 0x1); auto const data_vec = _mm256_set1_epi64x(mask); @@ -445,7 +461,7 @@ definitions: return _mm256_cmpeq_epi64(anded_vec, and_vec); - target_extension: "avx2" ctype: ["double"] - lscpu_flags: ['avx2'] + lscpu_flags: ['avx', 'avx2'] implementation: | auto const and_vec = _mm256_set_epi64x(0x8, 0x4, 0x2, 0x1); auto const data_vec = _mm256_set1_epi64x(mask); From c946cf972e244ff20510f491ffaf861cab422333 Mon Sep 17 00:00:00 2001 From: Johannes Pietrzyk Date: Thu, 21 Sep 2023 15:14:22 +0200 Subject: [PATCH 44/47] Fixed to mask --- primitive_data/primitives/mask.yaml | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/primitive_data/primitives/mask.yaml b/primitive_data/primitives/mask.yaml index 7f9101cf..5b06b554 100644 --- a/primitive_data/primitives/mask.yaml +++ b/primitive_data/primitives/mask.yaml @@ -401,8 +401,8 @@ definitions: #INTEL - AVX2 - target_extension: "avx2" ctype: ["int8_t", "uint8_t"] - lscpu_flags: ['avx', 'avx2'] - specialization_comment: "@todo: Verify!" + lscpu_flags: ['sse4_2', 'avx', 'avx2'] + specialization_comment: "We use sse4_2 here to trick the generator into thinking that this variant is 'more' specialized than the ('sse2','avx') variant." implementation: | auto const permute_vec = _mm256_set_epi64x(0x303030303030303, 0x202020202020202, 0x101010101010101, 0x0); auto const data_vec = _mm256_set1_epi32(mask); @@ -413,7 +413,7 @@ definitions: return result; - target_extension: "avx2" ctype: ["int8_t", "uint8_t", "int16_t", "uint16_t", "int32_t", "uint32_t", "int64_t", "uint64_t"] - lscpu_flags: ['sse2'] + lscpu_flags: ['sse2', 'avx'] implementation: | using sse_type = simd; auto tmp128_lo = to_mask::apply(mask); @@ -421,7 +421,7 @@ definitions: return _mm256_set_m128i(tmp128_hi, tmp128_lo); - target_extension: "avx2" ctype: ["float", "double"] - lscpu_flags: ['sse2'] + lscpu_flags: ['sse2', 'avx'] implementation: | using sse_type = simd; auto tmp128_lo = to_mask::apply(mask); @@ -429,7 +429,7 @@ definitions: return _mm256_castsi256_{{ intrin_tp[ctype][1] }}(_mm256_set_m128i(tmp128_hi, tmp128_lo)); - target_extension: "avx2" ctype: ["int16_t", "uint16_t"] - lscpu_flags: ['avx', 'avx2'] + lscpu_flags: ['sse4_2', 'avx', 'avx2'] implementation: | auto const and_vec = _mm256_set_epi64x(0x8000400020001000, 0x800040002000100, 0x80004000200010, 0x8000400020001); auto const data_vec = _mm256_set1_epi16(mask); @@ -437,7 +437,7 @@ definitions: return _mm256_cmpeq_epi16(anded_vec, and_vec); - target_extension: "avx2" ctype: ["int32_t", "uint32_t"] - lscpu_flags: ['avx', 'avx2'] + lscpu_flags: ['sse4_2', 'avx', 'avx2'] implementation: | auto const and_vec = _mm256_set_epi64x(0x8000000040, 0x2000000010, 0x800000004, 0x200000001); auto const data_vec = _mm256_set1_epi32(mask); @@ -445,7 +445,7 @@ definitions: return _mm256_cmpeq_epi32(anded_vec, and_vec); - target_extension: "avx2" ctype: ["float"] - lscpu_flags: ['avx', 'avx2'] + lscpu_flags: ['sse4_2', 'avx', 'avx2'] implementation: | auto const and_vec = _mm256_set_epi64x(0x8000000040, 0x2000000010, 0x800000004, 0x200000001); auto const data_vec = _mm256_set1_epi32(mask); @@ -453,7 +453,7 @@ definitions: return _mm256_castsi256_ps(_mm256_cmpeq_epi32(anded_vec, and_vec)); - target_extension: "avx2" ctype: ["int64_t", "uint64_t"] - lscpu_flags: ['avx', 'avx2'] + lscpu_flags: ['sse4_2', 'avx', 'avx2'] implementation: | auto const and_vec = _mm256_set_epi64x(0x8, 0x4, 0x2, 0x1); auto const data_vec = _mm256_set1_epi64x(mask); @@ -461,7 +461,7 @@ definitions: return _mm256_cmpeq_epi64(anded_vec, and_vec); - target_extension: "avx2" ctype: ["double"] - lscpu_flags: ['avx', 'avx2'] + lscpu_flags: ['sse4_2', 'avx', 'avx2'] implementation: | auto const and_vec = _mm256_set_epi64x(0x8, 0x4, 0x2, 0x1); auto const data_vec = _mm256_set1_epi64x(mask); From 1faaba02ad64267ad0e27f5d31a586f95a6d3f6a Mon Sep 17 00:00:00 2001 From: Johannes Pietrzyk Date: Thu, 21 Sep 2023 15:45:37 +0200 Subject: [PATCH 45/47] Fixed architecture flags dict --- generator/config/generator/tsl_generator_schema.yaml | 2 +- generator/expansions/tsl_cmake.py | 10 +++++++--- 2 files changed, 8 insertions(+), 4 deletions(-) diff --git a/generator/config/generator/tsl_generator_schema.yaml b/generator/config/generator/tsl_generator_schema.yaml index 097bea96..343e9a0b 100644 --- a/generator/config/generator/tsl_generator_schema.yaml +++ b/generator/config/generator/tsl_generator_schema.yaml @@ -104,7 +104,7 @@ extension: type: "dict" brief: "Dictionary for mapping architecture flags to compiler related arcitecture flags. Only non-obvious mappings must be included in this dictionary." example: "{sse4_1: 'msse4.1', sse4_2: 'msse4.2'}" - default: "{}" + default: {} includes: *includes runtime_headers: type: "list" diff --git a/generator/expansions/tsl_cmake.py b/generator/expansions/tsl_cmake.py index ec3065ce..a3e573a1 100644 --- a/generator/expansions/tsl_cmake.py +++ b/generator/expansions/tsl_cmake.py @@ -17,16 +17,20 @@ def __init__(self): @staticmethod def generate_lib(lib: TSLLib, file_generator: TSLFileGenerator, translation_units: TSLTranslationUnitContainer, cmake_config: dict) -> None: def get_architecture_flags(lib: TSLLib) -> str: + hollistic_arch_flags_dict = dict() + for extension in lib.extension_set: + arch_flags: dict = extension.data["arch_flags"] if "arch_flags" in extension.data else dict() + hollistic_arch_flags_dict.update(arch_flags) result: Set[str] = set() for primitive_definition in lib.primitive_class_set.definitions(): extension: TSLExtension = lib.extension_set.get_extension_by_name( primitive_definition.target_extension) if extension.data["needs_arch_flags"]: - arch_flags: dict = extension.data["arch_flags"] if "arch_flags" in extension.data else dict() + # arch_flags: dict = extension.data["arch_flags"] if "arch_flags" in extension.data else dict() for flag in primitive_definition.architecture_flags: f = flag - if flag in arch_flags: - f = arch_flags[flag] + if flag in hollistic_arch_flags_dict: + f = hollistic_arch_flags_dict[flag] result.add(f"{config.compiler_architecture_prefix}{f}") return " ".join(result) def get_warning_options() -> str: From ed8e301f240bc26db88aa3010e5572a451e3dadc Mon Sep 17 00:00:00 2001 From: Johannes Pietrzyk Date: Thu, 21 Sep 2023 15:53:10 +0200 Subject: [PATCH 46/47] Still fixing to_mask usage (load_mask) --- primitive_data/primitives/mask.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/primitive_data/primitives/mask.yaml b/primitive_data/primitives/mask.yaml index 5b06b554..ff8571c5 100644 --- a/primitive_data/primitives/mask.yaml +++ b/primitive_data/primitives/mask.yaml @@ -1147,7 +1147,7 @@ definitions: implementation: "return tsl::to_mask(*memory);" - target_extension: "sse" ctype: ["uint8_t", "uint16_t", "uint32_t", "uint64_t", "int8_t", "int16_t", "int32_t", "int64_t", "float", "double"] - lscpu_flags: ["sse"] + lscpu_flags: ["sse2"] implementation: "return tsl::to_mask(*memory);" ... #--- From 77dfe7e97d95ea9b4839603e554e975b53a01076 Mon Sep 17 00:00:00 2001 From: Johannes Pietrzyk Date: Mon, 25 Sep 2023 15:22:06 +0200 Subject: [PATCH 47/47] Fixed tests --- generator/core/ctrl/tsl_dependencies.py | 6 ++++++ generator/core/model/tsl_primitive.py | 3 ++- generator/expansions/tsl_unit_test.py | 9 +++++++-- primitive_data/primitives/mask.yaml | 16 ++++++++++++++++ 4 files changed, 31 insertions(+), 3 deletions(-) diff --git a/generator/core/ctrl/tsl_dependencies.py b/generator/core/ctrl/tsl_dependencies.py index 4a623322..c1c56d02 100644 --- a/generator/core/ctrl/tsl_dependencies.py +++ b/generator/core/ctrl/tsl_dependencies.py @@ -93,6 +93,12 @@ def find(self, name: str) -> NodeType|None: return node return None + def find_test(self, primitive_name: str, test_name) -> TSLDependencyGraph.PrimitiveTestNode|None: + node = TSLDependencyGraph.PrimitiveTestNode(f"{primitive_name}::{test_name}") + if node in self.__dependency_graph: + return node + return None + def traverse_by_type(self, nodes: List[NodeType], node_types_of_interest: list, reversed:bool, self_contained: bool = False) -> Generator[NodeType, None, None]: for current_node in nodes: if self_contained: diff --git a/generator/core/model/tsl_primitive.py b/generator/core/model/tsl_primitive.py index ef3c3d39..d9352d05 100644 --- a/generator/core/model/tsl_primitive.py +++ b/generator/core/model/tsl_primitive.py @@ -302,7 +302,8 @@ def has_test(self) -> bool: def tests(self) -> Generator[YamlDataType, None, None]: if self.has_test(): for test in self.declaration.data["testing"]: - yield copy.deepcopy(test) + if "implementation" in test: + yield copy.deepcopy(test) def get_tests_implementations(self, copy: bool = True) -> Generator[Tuple[str, str], None, None]: if self.has_test(): diff --git a/generator/expansions/tsl_unit_test.py b/generator/expansions/tsl_unit_test.py index 1977d129..437fc586 100644 --- a/generator/expansions/tsl_unit_test.py +++ b/generator/expansions/tsl_unit_test.py @@ -228,7 +228,7 @@ def as_dict(self) -> dict: class TSLTestSuite: @LogInit() - def __init__(self, lib: TSLLib) -> None: + def __init__(self, lib: TSLLib, dep_graph: TSLDependencyGraph) -> None: self.__test_cases: List[TSLPrimitiveTestCaseData] = [] self.__test_class_names: Set[str] = set() self.__primitive_test: Set[Tuple[str, str]] = set() @@ -243,10 +243,15 @@ def __init__(self, lib: TSLLib) -> None: primitive_definition_extension_ctype: Dict[str, List[str]] = primitive.specialization_dict() missing_primitive_definitions: Dict[str, Dict[str, List[str]]] = dict() for test in primitive.tests: + #inject dependencies from TSLDependencyGraph + test_node = dep_graph.find_test(primitive.declaration.functor_name, test["test_name"]) if test["test_name"] in test_name_dict: test_name = f"{test['test_name']}_{test_name_dict[test['test_name']]}" test_name_dict[test["test_name"]] += 1 test["test_name"] = test_name + if test_node is None: + raise ValueError(f"Does not know test {primitive.declaration.name}::{test['test_name']}") + test['requires'] = list({*test['requires'], *{dep for dep in dep_graph.get_required_primitives(test_node)}}) if ("requires" in test) and (len(test['requires']) > 0): updated_primitive_definition_extension_ctype: Dict[str, List[str]] = dict() for target_extension in primitive_definition_extension_ctype: @@ -496,7 +501,7 @@ def generate(lib: TSLLib, dep_graph: TSLDependencyGraph) -> Generator[Tuple[Path tsltu: TSLTranslationUnit = TSLTranslationUnit(target_name="tsl_test") - suite: TSLTestSuite = TSLTestSuite(lib) + suite: TSLTestSuite = TSLTestSuite(lib, dep_graph) dependency_graph: TSLTestDependencyGraph = TSLTestDependencyGraph(suite) dependency_graph.update_completeness() root_path: Path = config.generation_out_path.joinpath(unit_test_config["root_path"]) diff --git a/primitive_data/primitives/mask.yaml b/primitive_data/primitives/mask.yaml index ff8571c5..293a4f4d 100644 --- a/primitive_data/primitives/mask.yaml +++ b/primitive_data/primitives/mask.yaml @@ -500,6 +500,14 @@ definitions: auto const data_vec = _mm_set1_epi32(mask); auto const anded_vec = _mm_and_si128(data_vec, and_vec); return _mm_castsi128_ps(_mm_cmpeq_epi32(anded_vec, and_vec)); + - target_extension: ["sse"] + ctype: ["uint64_t", "int64_t"] + lscpu_flags: ["sse2"] + implementation: | + auto const and_vec = _mm_set_epi32(0x2, 0x2, 0x1, 0x1); + auto const data_vec = _mm_set1_epi32(mask); + auto const anded_vec = _mm_and_si128(data_vec, and_vec); + return _mm_cmpeq_epi32(anded_vec, and_vec); - target_extension: ["sse"] ctype: ["uint64_t", "int64_t"] lscpu_flags: ["sse2", "sse4_1"] @@ -516,6 +524,14 @@ definitions: auto const data_vec = _mm_set1_epi64x(mask); auto const anded_vec = _mm_and_si128(data_vec, and_vec); return _mm_castsi128_pd(_mm_cmpeq_epi64(anded_vec, and_vec)); + - target_extension: ["sse"] + ctype: ["double"] + lscpu_flags: ["sse2"] + implementation: | + auto const and_vec = _mm_set_epi32(0x2, 0x2, 0x1, 0x1); + auto const data_vec = _mm_set1_epi32(mask); + auto const anded_vec = _mm_and_si128(data_vec, and_vec); + return _mm_castsi128_pd(_mm_cmpeq_epi32(anded_vec, and_vec)); ... --- primitive_name: "mask_binary_not"