pulp-platform · Victor-Jung · Nov 14, 2024 · Sep 11, 2024 · Sep 11, 2024 · Sep 11, 2024
diff --git a/.github/workflows/CI.yml b/.github/workflows/CI.yml
@@ -48,6 +48,7 @@ jobs:
         testRequantizedDWConv
         test2DRequantizedConv
         iSoftmax
+        FloatAdder
 
   generic-models:
     uses: ./.github/workflows/TestRunnerGeneric.yml
@@ -149,6 +150,7 @@ jobs:
         Hardswish
         RQHardswish
         testBacktracking
+        FloatAdder
       num-cores: 8
 
   siracusa-models:

diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -15,3 +15,13 @@
 ### Fixed
 - Update the link of the Docker container used to run the CI with the Docker published by this repo instead of my fork.
 - Add a retry on timeout step for large network tests. This is a temporary fix to address the sporadic freeze happening at the compilation stage, see [this issue](https://github.com/pulp-platform/Deeploy/issues/9).
+
+## Floating Point Support
+
+### Added
+- Add the `FloatImmediate` `AbstractType`
+- Define fp64, fp32, fp16, and bf16
+- Add float binding for the Adder in the Generic platform
+- Add a FloatAdder test to the CI for Siracusa and Generic platforms
+- Extend `testType.py` with float tests
+- LIMITATION: Current LLVM compiler does not support bfp16 and fp16, these types are commented in the library header
diff --git a/CONTRIBUTORS.md b/CONTRIBUTORS.md
@@ -4,3 +4,5 @@ All contributors have agreed to an open-source release of their work in the Deep
 * Victor Jung
 * Philip Wiese
 * Luka Macan
+* Alberto Dequino
+* Francesco Conti
diff --git a/Deeploy/AbstractDataTypes.py b/Deeploy/AbstractDataTypes.py
@@ -26,6 +26,7 @@
 from __future__ import annotations
 
 import copy
+import math
 from abc import abstractmethod
 from dataclasses import dataclass
 from typing import Dict, Generic, Iterable, List, Optional, Type, TypeVar, Union
@@ -234,6 +235,74 @@ def checkValue(cls, value: Union[int, Iterable[int]], ctxt: Optional[_NetworkCon
         return True
 
 
+class FloatImmediate(Immediate[Union[float, Iterable[float]], _ImmediateType]):
+    typeMantissa: int  #: int: Represents the number of bits reserved for the mantissa part
+    typeExponent: int  #: int: Represents the number of bits reserved for the exponent part
+
+    @_classproperty
+    def typeExponentMax(cls) -> int:
+        # In floating point, all 1 in exponent is reserved for special numbers (i.e. NaN or Inf)
+        return 2**(cls.typeExponent) - 2
+
+    @_classproperty
+    def typeExponentOffset(cls) -> int:
+        # The offset added to the exponent
+        return 2**(cls.typeExponent - 1) - 1
+
+    @classmethod
+    def partialOrderUpcast(cls, otherCls: Type[Immediate]) -> bool:
+        if issubclass(otherCls, FloatImmediate):
+            return cls.typeMantissa >= otherCls.typeMantissa and cls.typeExponent >= otherCls.typeExponent
+        else:
+            return False
+
+    @classmethod
+    def checkValue(cls, value: Union[float, Iterable[float]], ctxt: Optional[_NetworkContext] = None):
+        """
+        This method tries to manually cast standard python's standard immediate float precision values 
+        (64 bits) to an arbitrary FP representation and check if the new representation is close enough 
+        to the original value.
+        """
+        _val_list = []
+
+        if isinstance(value, float):
+            _val_list.append(value)
+        elif isinstance(value, np.ndarray):
+            _val_list = value.tolist()
+        elif isinstance(value, Iterable):
+            for i in value:
+                _val_list.append(i)
+        else:
+            raise Exception("Immediate type not recognized.")
+
+        # The exponent bias for FP64 is 2**(11-1)-1 as the exponent has 11 bits.
+        DOUBLE_MIN_EXP = -1023
+
+        for val in _val_list:
+
+            # Extract mantissa, exponent, and sign.
+            # Also bring mantissa and exponent to IEEE754 compliant form for non-denormals.
+            mantissa, exponent = math.frexp(val)
+            sign = True if mantissa < 0 else False
+            mantissa = -mantissa * 2 if sign else mantissa * 2
+            exponent -= 1
+
+            # Check if the number is finite, nonzero and not denormal, otherwise skip the check.
+            if not (math.isfinite(val) and val != 0 and exponent > DOUBLE_MIN_EXP):
+                continue
+
+            # Check if exponent is representable.
+            if (cls.typeExponentOffset + exponent) > cls.typeExponentMax or (cls.typeExponentOffset + exponent) < 0:
+                return False
+
+            # Check if mantissa is representable. Implicit assumption is that cls.typeMantissa < 52 (like in FP64)
+            truncated_mantissa = 1 + math.floor((2**cls.typeMantissa) * (mantissa - 1)) / (2**cls.typeMantissa)
+            if math.fabs(truncated_mantissa - mantissa) > 0.0:
+                return False
+
+        return True
+
+
 class Pointer(BaseType[Optional[str], _PointerType]):
     """Represents a C Pointer type to an underlying BaseType data type
     """

diff --git a/Deeploy/CommonExtensions/DataTypes.py b/Deeploy/CommonExtensions/DataTypes.py
@@ -25,7 +25,7 @@
 
 from typing import Tuple, Type
 
-from Deeploy.AbstractDataTypes import IntegerImmediate
+from Deeploy.AbstractDataTypes import FloatImmediate, IntegerImmediate
 
 
 class int8_t(IntegerImmediate):
@@ -76,10 +76,39 @@ class uint64_t(IntegerImmediate):
     signed = False
 
 
+class bfloat16_t(FloatImmediate):
+    typeName = "bfloat16_t"
+    typeWidth = 16
+    typeMantissa = 7
+    typeExponent = 8
+
+
+class float16_t(FloatImmediate):
+    typeName = "float16_t"
+    typeWidth = 16
+    typeMantissa = 10
+    typeExponent = 5
+
+
+class float32_t(FloatImmediate):
+    typeName = "float32_t"
+    typeWidth = 32
+    typeMantissa = 23
+    typeExponent = 8
+
+
+class float64_t(FloatImmediate):
+    typeName = "float64_t"
+    typeWidth = 64
+    typeMantissa = 52
+    typeExponent = 11
+
+
 SignedIntegerDataTypes: Tuple[Type[IntegerImmediate], ...] = (int8_t, int16_t, int32_t, int64_t)
 UnsignedIntegerDataTypes: Tuple[Type[IntegerImmediate], ...] = (uint8_t, uint16_t, uint32_t, uint64_t)
 IntegerDataTypes: Tuple[Type[IntegerImmediate], ...] = (sorted((
     *SignedIntegerDataTypes,
     *UnsignedIntegerDataTypes,
 ),
                                                                key = lambda _type: _type.typeWidth))
+FloatDataTypes: Tuple[Type[FloatImmediate], ...] = (bfloat16_t, float16_t, float32_t, float64_t)
diff --git a/Deeploy/DeeployTypes.py b/Deeploy/DeeployTypes.py
@@ -45,7 +45,7 @@
 from onnx.external_data_helper import convert_model_to_external_data
 from ortools.constraint_solver.pywrapcp import IntVar
 
-from .AbstractDataTypes import BaseType, IntegerImmediate, Pointer, PointerClass, Struct, VoidType
+from .AbstractDataTypes import BaseType, FloatImmediate, IntegerImmediate, Pointer, PointerClass, Struct, VoidType
 
 Shape = TypeVar("Shape", bound = Any)
 SubGraph = List[gs.Node]
@@ -1903,11 +1903,16 @@ def _broadcastInteger(ty: Type[IntegerImmediate]):
             else:
                 return np.dtype(getattr(np, "uint" + str(ty.typeWidth)))
 
+        def _broadcastFloat(ty: Type[FloatImmediate]):
+            return np.dtype(getattr(np, "double"))
+
         if issubclass(ty, Pointer) and hasattr(ty, "referencedType"):
             if issubclass(ty.referencedType, IntegerImmediate):
                 return _broadcastInteger(ty.referencedType)
         elif issubclass(ty, IntegerImmediate):
             return _broadcastInteger(ty)
+        elif issubclass(ty, FloatImmediate):
+            return _broadcastFloat(ty)
 
         return None
 

diff --git a/Deeploy/Targets/Generic/Bindings.py b/Deeploy/Targets/Generic/Bindings.py
@@ -30,17 +30,18 @@
 from Deeploy.AbstractDataTypes import PointerClass
 from Deeploy.CommonExtensions.CodeTransformationPasses.MemoryAllocation import ArgumentStructGeneration, \
     MemoryManagementGeneration, MemoryPassthroughGeneration
-from Deeploy.CommonExtensions.DataTypes import IntegerDataTypes, SignedIntegerDataTypes, int8_t, int32_t, uint8_t
+from Deeploy.CommonExtensions.DataTypes import FloatDataTypes, IntegerDataTypes, SignedIntegerDataTypes, int8_t, \
+    int32_t, uint8_t
 from Deeploy.DeeployTypes import CodeTransformation, NodeBinding
 from Deeploy.FutureExtension.CodeTransformationPasses.FutureCodeTransformation import FutureGeneration
 from Deeploy.Targets.Generic.Templates import AddTemplate, ConcatTemplate, ConvTemplate, DebugPrintTemplate, \
-    DummyTemplate, DWConvTemplate, GatherTemplate, GemmTemplate, IntegerDivTemplate, ITAMaxTemplate, \
+    DummyTemplate, DWConvTemplate, FloatAddTemplate, GatherTemplate, GemmTemplate, IntegerDivTemplate, ITAMaxTemplate, \
     ITAPartialMaxTemplate, MatMulTemplate, MaxPoolTemplate, MulTemplate, PadTemplate, ReduceMeanTemplate, \
     ReduceSumTemplate, RequantShiftTemplate, ReshapeTemplate, RQIntegerDivTemplate, RQSiGELUTemplate, SliceTemplate, \
     TransposeTemplate, iGELUTemplate, iLayernormTemplate, iRMSNormTemplate, iSoftmaxTemplate
 from Deeploy.Targets.Generic.TypeCheckers import AddChecker, ConcatChecker, ConvChecker, DebugPrintChecker, \
-    DummyChecker, GatherChecker, GELUChecker, GEMMChecker, IntegerDivChecker, MatMulChecker, MaxPoolChecker, \
-    MulChecker, PadChecker, ReduceMeanChecker, ReduceSumChecker, RequantShiftChecker, ReshapeChecker, \
+    DummyChecker, FloatAddChecker, GatherChecker, GELUChecker, GEMMChecker, IntegerDivChecker, MatMulChecker, \
+    MaxPoolChecker, MulChecker, PadChecker, ReduceMeanChecker, ReduceSumChecker, RequantShiftChecker, ReshapeChecker, \
     RQIntegerDivChecker, SliceChecker, SoftmaxChecker, TransposeChecker, iLayerNormChecker
 
 BasicTransformer = CodeTransformation([ArgumentStructGeneration(), MemoryManagementGeneration(), FutureGeneration()])
@@ -65,6 +66,9 @@
                 AddTemplate.referenceTemplate, BasicTransformer)
     for type1 in IntegerDataTypes
     for type2 in IntegerDataTypes
+] + [
+    NodeBinding(FloatAddChecker([PointerClass(type), PointerClass(type)], [PointerClass(type)]),
+                FloatAddTemplate.referenceTemplate, BasicTransformer) for type in FloatDataTypes
 ]
 
 BasicConv1DBinding = NodeBinding(ConvChecker([PointerClass(int8_t), PointerClass(int8_t)], [PointerClass(int32_t)]),

diff --git a/Deeploy/Targets/Generic/Templates/FloatAddTemplate.py b/Deeploy/Targets/Generic/Templates/FloatAddTemplate.py
@@ -0,0 +1,52 @@
+# ----------------------------------------------------------------------
+#
+# File: FloatAddTemplate.py
+#
+# Last edited: 13.11.2024
+#
+# Copyright (C) 2021, ETH Zurich and University of Bologna.
+#
+# Authors:
+# - Francesco Conti, UNIBO
+# - Alberto Dequino, UNIBO
+#
+# ----------------------------------------------------------------------
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the License); you may
+# not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an AS IS BASIS, WITHOUT
+# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from typing import Dict, List, Tuple
+
+from Deeploy.DeeployTypes import NetworkContext, NodeTemplate, OperatorRepresentation
+
+
+class _FloatAddTemplate(NodeTemplate):
+
+    def alignToContext(self, ctxt: NetworkContext,
+                       operatorRepresentation: OperatorRepresentation) -> Tuple[NetworkContext, Dict, List[str]]:
+
+        data_in_1 = ctxt.lookup(operatorRepresentation['data_in_1'])
+        data_in_2 = ctxt.lookup(operatorRepresentation['data_in_2'])
+        data_out = ctxt.lookup(operatorRepresentation['data_out'])
+
+        return ctxt, operatorRepresentation, []
+
+
+referenceTemplate = _FloatAddTemplate("""
+// Add (Name: ${nodeName}, Op: ${nodeOp})
+BEGIN_SINGLE_CORE
+    for (uint32_t i=0;i<${size};i++){
+        ${data_out}[i] = ${data_in_1}[i] + ${data_in_2}[i];
+    }
+END_SINGLE_CORE
+""")
diff --git a/Deeploy/Targets/Generic/TypeCheckers.py b/Deeploy/Targets/Generic/TypeCheckers.py
@@ -125,6 +125,20 @@ def _inferSignedness(self, inputs: List[VariableBuffer],
             return [False]
 
 
+class FloatAddChecker(SignPropTypeChecker):
+
+    def __init__(self, input_types: Sequence[Type[Pointer]], output_types: Sequence[Type[Pointer]]):
+        super().__init__(input_types, output_types)
+
+    def _inferNumLevels(self, inputs: List[VariableBuffer],
+                        operatorRepresentation: OperatorRepresentation) -> List[int]:
+        return [inputs[0].nLevels + inputs[1].nLevels]
+
+    def _inferSignedness(self, inputs: List[VariableBuffer],
+                         operatorRepresentation: OperatorRepresentation) -> List[bool]:
+        return [True]
+
+
 class GatherChecker(SignPropTypeChecker):
 
     def __init__(self, input_types: Sequence[Type[Pointer]], output_types: Sequence[Type[Pointer]]):

diff --git a/DeeployTest/Tests/FloatAdder/activations.npz b/DeeployTest/Tests/FloatAdder/activations.npz
diff --git a/DeeployTest/Tests/FloatAdder/inputs.npz b/DeeployTest/Tests/FloatAdder/inputs.npz
diff --git a/DeeployTest/Tests/FloatAdder/network.onnx b/DeeployTest/Tests/FloatAdder/network.onnx
@@ -0,0 +1,30 @@
+onnxruntime.transformers1.16.1:¨
+(
+input
+onnx::Add_1outputAdd_0"Addtorch-jit-exportZ
+input
+
+
+Z
+onnx::Add_1
+
+
+b
+output
+
+
+j
+output
+
+
+B
+B
+
+ai.onnx.mlB
+ai.onnx.trainingB
+com.ms.internal.nhwcB
+ai.onnx.preview.trainingB
+com.microsoftB
+com.microsoft.experimentalB
+com.microsoft.nchwcB
+org.pytorch.aten

diff --git a/DeeployTest/Tests/FloatAdder/outputs.npz b/DeeployTest/Tests/FloatAdder/outputs.npz
diff --git a/DeeployTest/generateNetwork.py b/DeeployTest/generateNetwork.py
@@ -76,9 +76,9 @@
         test_inputs, test_outputs, graph = generateDebugConfig(inputs, outputs, activations, graph)
 
     else:
-        # Load as int64 and infer types later
-        test_inputs = [inputs[x].reshape(-1).astype(np.int64) for x in inputs.files]
-        test_outputs = [outputs[x].reshape(-1).astype(np.int64) for x in outputs.files]
+        # Load as float64 and infer types later
+        test_inputs = [inputs[x].reshape(-1).astype(np.float64) for x in inputs.files]
+        test_outputs = [outputs[x].reshape(-1).astype(np.float64) for x in outputs.files]
 
         # WIESEP: Hack to get CI running because only one specific array is used
         if "WaveFormer" in args.dir:

diff --git a/DeeployTest/testTypes.py b/DeeployTest/testTypes.py
@@ -28,7 +28,7 @@
 import pytest
 
 from Deeploy.AbstractDataTypes import PointerClass, StructClass
-from Deeploy.CommonExtensions.DataTypes import IntegerDataTypes, int8_t, int16_t, int32_t
+from Deeploy.CommonExtensions.DataTypes import IntegerDataTypes, bfloat16_t, float32_t, int8_t, int16_t, int32_t
 from Deeploy.DeeployTypes import ConstantBuffer, NetworkContext, StructBuffer, TransientBuffer, VariableBuffer
 
 
@@ -93,6 +93,32 @@ def testImmediatePromotion():
     return True
 
 
+def testImmediatePromotionFloat():
+    with pytest.raises(Exception):
+        _ = bfloat16_t(0.1)
+        _ = bfloat16_t(7777777.0)
+        _ = bfloat16_t(0.2)
+        _ = float32_t(77777777.0)
+        _ = float32_t(0.0000800006853044033050537109375)
+        c = bfloat16_t(7777777)
+    a = bfloat16_t(12.375)
+    b = bfloat16_t(0.5)
+    c = float32_t(7777777.0)
+    d = float32_t(77777776.0)
+
+    e = float32_t(0.0000900006853044033050537109375)
+
+    _ = bfloat16_t(0.000079631805419921875)
+
+    with pytest.raises(Exception):
+        _ = bfloat16_t(c)
+        _ = bfloat16_t(d)
+        _ = bfloat16_t(e)
+        _ = bfloat16_t(0.000079631805419921885)
+
+    return True
+
+
 def generateTestStruct() -> StructClass:
     testStructType = {"f1": int32_t, "f2": int8_t}
     s1 = StructClass("s2", testStructType)
@@ -223,6 +249,7 @@ def testPointerTypeEquivalence():
     testImmediateSerialization()
     testImmediatePromotion()
     testImmediateTypeEquivalence()
+    testImmediatePromotionFloat()
 
     testStructSerialization()
     testStructPromotion()