Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

OPEN: FP integration (v2) #12

Merged
merged 20 commits into from
Nov 14, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
20 commits
Select commit Hold shift + click to select a range
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions .github/workflows/CI.yml
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,7 @@ jobs:
testRequantizedDWConv
test2DRequantizedConv
iSoftmax
FloatAdder

generic-models:
uses: ./.github/workflows/TestRunnerGeneric.yml
Expand Down Expand Up @@ -149,6 +150,7 @@ jobs:
Hardswish
RQHardswish
testBacktracking
FloatAdder
num-cores: 8

siracusa-models:
Expand Down
10 changes: 10 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -15,3 +15,13 @@
### Fixed
- Update the link of the Docker container used to run the CI with the Docker published by this repo instead of my fork.
- Add a retry on timeout step for large network tests. This is a temporary fix to address the sporadic freeze happening at the compilation stage, see [this issue](https://github.com/pulp-platform/Deeploy/issues/9).

## Floating Point Support

### Added
- Add the `FloatImmediate` `AbstractType`
- Define fp64, fp32, fp16, and bf16
- Add float binding for the Adder in the Generic platform
- Add a FloatAdder test to the CI for Siracusa and Generic platforms
- Extend `testType.py` with float tests
- LIMITATION: Current LLVM compiler does not support bfp16 and fp16, these types are commented in the library header
2 changes: 2 additions & 0 deletions CONTRIBUTORS.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,3 +4,5 @@ All contributors have agreed to an open-source release of their work in the Deep
* Victor Jung
* Philip Wiese
* Luka Macan
* Alberto Dequino
* Francesco Conti
69 changes: 69 additions & 0 deletions Deeploy/AbstractDataTypes.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@
from __future__ import annotations

import copy
import math
from abc import abstractmethod
from dataclasses import dataclass
from typing import Dict, Generic, Iterable, List, Optional, Type, TypeVar, Union
Expand Down Expand Up @@ -234,6 +235,74 @@ def checkValue(cls, value: Union[int, Iterable[int]], ctxt: Optional[_NetworkCon
return True


class FloatImmediate(Immediate[Union[float, Iterable[float]], _ImmediateType]):
typeMantissa: int #: int: Represents the number of bits reserved for the mantissa part
typeExponent: int #: int: Represents the number of bits reserved for the exponent part

@_classproperty
def typeExponentMax(cls) -> int:
# In floating point, all 1 in exponent is reserved for special numbers (i.e. NaN or Inf)
return 2**(cls.typeExponent) - 2

@_classproperty
def typeExponentOffset(cls) -> int:
# The offset added to the exponent
return 2**(cls.typeExponent - 1) - 1

@classmethod
def partialOrderUpcast(cls, otherCls: Type[Immediate]) -> bool:
if issubclass(otherCls, FloatImmediate):
return cls.typeMantissa >= otherCls.typeMantissa and cls.typeExponent >= otherCls.typeExponent
else:
return False

@classmethod
def checkValue(cls, value: Union[float, Iterable[float]], ctxt: Optional[_NetworkContext] = None):
"""
This method tries to manually cast standard python's standard immediate float precision values
(64 bits) to an arbitrary FP representation and check if the new representation is close enough
to the original value.
"""
_val_list = []

if isinstance(value, float):
_val_list.append(value)
elif isinstance(value, np.ndarray):
_val_list = value.tolist()
elif isinstance(value, Iterable):
for i in value:
_val_list.append(i)
else:
raise Exception("Immediate type not recognized.")

# The exponent bias for FP64 is 2**(11-1)-1 as the exponent has 11 bits.
DOUBLE_MIN_EXP = -1023

for val in _val_list:

# Extract mantissa, exponent, and sign.
# Also bring mantissa and exponent to IEEE754 compliant form for non-denormals.
mantissa, exponent = math.frexp(val)
sign = True if mantissa < 0 else False
mantissa = -mantissa * 2 if sign else mantissa * 2
exponent -= 1

# Check if the number is finite, nonzero and not denormal, otherwise skip the check.
if not (math.isfinite(val) and val != 0 and exponent > DOUBLE_MIN_EXP):
continue

# Check if exponent is representable.
if (cls.typeExponentOffset + exponent) > cls.typeExponentMax or (cls.typeExponentOffset + exponent) < 0:
return False

# Check if mantissa is representable. Implicit assumption is that cls.typeMantissa < 52 (like in FP64)
truncated_mantissa = 1 + math.floor((2**cls.typeMantissa) * (mantissa - 1)) / (2**cls.typeMantissa)
if math.fabs(truncated_mantissa - mantissa) > 0.0:
return False

return True


class Pointer(BaseType[Optional[str], _PointerType]):
"""Represents a C Pointer type to an underlying BaseType data type
"""
Expand Down
31 changes: 30 additions & 1 deletion Deeploy/CommonExtensions/DataTypes.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@

from typing import Tuple, Type

from Deeploy.AbstractDataTypes import IntegerImmediate
from Deeploy.AbstractDataTypes import FloatImmediate, IntegerImmediate


class int8_t(IntegerImmediate):
Expand Down Expand Up @@ -76,10 +76,39 @@ class uint64_t(IntegerImmediate):
signed = False


class bfloat16_t(FloatImmediate):
typeName = "bfloat16_t"
typeWidth = 16
typeMantissa = 7
typeExponent = 8


class float16_t(FloatImmediate):
typeName = "float16_t"
typeWidth = 16
typeMantissa = 10
typeExponent = 5


class float32_t(FloatImmediate):
typeName = "float32_t"
typeWidth = 32
typeMantissa = 23
typeExponent = 8


class float64_t(FloatImmediate):
typeName = "float64_t"
typeWidth = 64
typeMantissa = 52
typeExponent = 11


SignedIntegerDataTypes: Tuple[Type[IntegerImmediate], ...] = (int8_t, int16_t, int32_t, int64_t)
UnsignedIntegerDataTypes: Tuple[Type[IntegerImmediate], ...] = (uint8_t, uint16_t, uint32_t, uint64_t)
IntegerDataTypes: Tuple[Type[IntegerImmediate], ...] = (sorted((
*SignedIntegerDataTypes,
*UnsignedIntegerDataTypes,
),
key = lambda _type: _type.typeWidth))
FloatDataTypes: Tuple[Type[FloatImmediate], ...] = (bfloat16_t, float16_t, float32_t, float64_t)
7 changes: 6 additions & 1 deletion Deeploy/DeeployTypes.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@
from onnx.external_data_helper import convert_model_to_external_data
from ortools.constraint_solver.pywrapcp import IntVar

from .AbstractDataTypes import BaseType, IntegerImmediate, Pointer, PointerClass, Struct, VoidType
from .AbstractDataTypes import BaseType, FloatImmediate, IntegerImmediate, Pointer, PointerClass, Struct, VoidType

Shape = TypeVar("Shape", bound = Any)
SubGraph = List[gs.Node]
Expand Down Expand Up @@ -1903,11 +1903,16 @@ def _broadcastInteger(ty: Type[IntegerImmediate]):
else:
return np.dtype(getattr(np, "uint" + str(ty.typeWidth)))

def _broadcastFloat(ty: Type[FloatImmediate]):
return np.dtype(getattr(np, "double"))

if issubclass(ty, Pointer) and hasattr(ty, "referencedType"):
if issubclass(ty.referencedType, IntegerImmediate):
return _broadcastInteger(ty.referencedType)
elif issubclass(ty, IntegerImmediate):
return _broadcastInteger(ty)
elif issubclass(ty, FloatImmediate):
return _broadcastFloat(ty)

return None

Expand Down
12 changes: 8 additions & 4 deletions Deeploy/Targets/Generic/Bindings.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,17 +30,18 @@
from Deeploy.AbstractDataTypes import PointerClass
from Deeploy.CommonExtensions.CodeTransformationPasses.MemoryAllocation import ArgumentStructGeneration, \
MemoryManagementGeneration, MemoryPassthroughGeneration
from Deeploy.CommonExtensions.DataTypes import IntegerDataTypes, SignedIntegerDataTypes, int8_t, int32_t, uint8_t
from Deeploy.CommonExtensions.DataTypes import FloatDataTypes, IntegerDataTypes, SignedIntegerDataTypes, int8_t, \
int32_t, uint8_t
from Deeploy.DeeployTypes import CodeTransformation, NodeBinding
from Deeploy.FutureExtension.CodeTransformationPasses.FutureCodeTransformation import FutureGeneration
from Deeploy.Targets.Generic.Templates import AddTemplate, ConcatTemplate, ConvTemplate, DebugPrintTemplate, \
DummyTemplate, DWConvTemplate, GatherTemplate, GemmTemplate, IntegerDivTemplate, ITAMaxTemplate, \
DummyTemplate, DWConvTemplate, FloatAddTemplate, GatherTemplate, GemmTemplate, IntegerDivTemplate, ITAMaxTemplate, \
ITAPartialMaxTemplate, MatMulTemplate, MaxPoolTemplate, MulTemplate, PadTemplate, ReduceMeanTemplate, \
ReduceSumTemplate, RequantShiftTemplate, ReshapeTemplate, RQIntegerDivTemplate, RQSiGELUTemplate, SliceTemplate, \
TransposeTemplate, iGELUTemplate, iLayernormTemplate, iRMSNormTemplate, iSoftmaxTemplate
from Deeploy.Targets.Generic.TypeCheckers import AddChecker, ConcatChecker, ConvChecker, DebugPrintChecker, \
DummyChecker, GatherChecker, GELUChecker, GEMMChecker, IntegerDivChecker, MatMulChecker, MaxPoolChecker, \
MulChecker, PadChecker, ReduceMeanChecker, ReduceSumChecker, RequantShiftChecker, ReshapeChecker, \
DummyChecker, FloatAddChecker, GatherChecker, GELUChecker, GEMMChecker, IntegerDivChecker, MatMulChecker, \
MaxPoolChecker, MulChecker, PadChecker, ReduceMeanChecker, ReduceSumChecker, RequantShiftChecker, ReshapeChecker, \
RQIntegerDivChecker, SliceChecker, SoftmaxChecker, TransposeChecker, iLayerNormChecker

BasicTransformer = CodeTransformation([ArgumentStructGeneration(), MemoryManagementGeneration(), FutureGeneration()])
Expand All @@ -65,6 +66,9 @@
AddTemplate.referenceTemplate, BasicTransformer)
for type1 in IntegerDataTypes
for type2 in IntegerDataTypes
] + [
NodeBinding(FloatAddChecker([PointerClass(type), PointerClass(type)], [PointerClass(type)]),
FloatAddTemplate.referenceTemplate, BasicTransformer) for type in FloatDataTypes
]

BasicConv1DBinding = NodeBinding(ConvChecker([PointerClass(int8_t), PointerClass(int8_t)], [PointerClass(int32_t)]),
Expand Down
52 changes: 52 additions & 0 deletions Deeploy/Targets/Generic/Templates/FloatAddTemplate.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
# ----------------------------------------------------------------------
#
# File: FloatAddTemplate.py
#
# Last edited: 13.11.2024
#
# Copyright (C) 2021, ETH Zurich and University of Bologna.
#
# Authors:
# - Francesco Conti, UNIBO
# - Alberto Dequino, UNIBO
#
# ----------------------------------------------------------------------
# SPDX-License-Identifier: Apache-2.0
#
# Licensed under the Apache License, Version 2.0 (the License); you may
# not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an AS IS BASIS, WITHOUT
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

from typing import Dict, List, Tuple

from Deeploy.DeeployTypes import NetworkContext, NodeTemplate, OperatorRepresentation


class _FloatAddTemplate(NodeTemplate):

def alignToContext(self, ctxt: NetworkContext,
operatorRepresentation: OperatorRepresentation) -> Tuple[NetworkContext, Dict, List[str]]:

data_in_1 = ctxt.lookup(operatorRepresentation['data_in_1'])
data_in_2 = ctxt.lookup(operatorRepresentation['data_in_2'])
data_out = ctxt.lookup(operatorRepresentation['data_out'])

return ctxt, operatorRepresentation, []


referenceTemplate = _FloatAddTemplate("""
// Add (Name: ${nodeName}, Op: ${nodeOp})
BEGIN_SINGLE_CORE
for (uint32_t i=0;i<${size};i++){
${data_out}[i] = ${data_in_1}[i] + ${data_in_2}[i];
}
END_SINGLE_CORE
""")
14 changes: 14 additions & 0 deletions Deeploy/Targets/Generic/TypeCheckers.py
Original file line number Diff line number Diff line change
Expand Up @@ -125,6 +125,20 @@ def _inferSignedness(self, inputs: List[VariableBuffer],
return [False]


class FloatAddChecker(SignPropTypeChecker):
Scheremo marked this conversation as resolved.
Show resolved Hide resolved

def __init__(self, input_types: Sequence[Type[Pointer]], output_types: Sequence[Type[Pointer]]):
super().__init__(input_types, output_types)

def _inferNumLevels(self, inputs: List[VariableBuffer],
operatorRepresentation: OperatorRepresentation) -> List[int]:
return [inputs[0].nLevels + inputs[1].nLevels]

def _inferSignedness(self, inputs: List[VariableBuffer],
operatorRepresentation: OperatorRepresentation) -> List[bool]:
return [True]
Scheremo marked this conversation as resolved.
Show resolved Hide resolved


class GatherChecker(SignPropTypeChecker):

def __init__(self, input_types: Sequence[Type[Pointer]], output_types: Sequence[Type[Pointer]]):
Expand Down
Binary file added DeeployTest/Tests/FloatAdder/activations.npz
Binary file not shown.
Binary file added DeeployTest/Tests/FloatAdder/inputs.npz
Binary file not shown.
30 changes: 30 additions & 0 deletions DeeployTest/Tests/FloatAdder/network.onnx
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
onnxruntime.transformers1.16.1:¨
(
input
onnx::Add_1outputAdd_0"Addtorch-jit-exportZ
input


Z
onnx::Add_1


b
output


j
output


B
B

ai.onnx.mlB
ai.onnx.trainingB
com.ms.internal.nhwcB
ai.onnx.preview.trainingB
com.microsoftB
com.microsoft.experimentalB
com.microsoft.nchwcB
org.pytorch.aten
Expand Down
Binary file added DeeployTest/Tests/FloatAdder/outputs.npz
Binary file not shown.
6 changes: 3 additions & 3 deletions DeeployTest/generateNetwork.py
Original file line number Diff line number Diff line change
Expand Up @@ -76,9 +76,9 @@
test_inputs, test_outputs, graph = generateDebugConfig(inputs, outputs, activations, graph)

else:
# Load as int64 and infer types later
test_inputs = [inputs[x].reshape(-1).astype(np.int64) for x in inputs.files]
test_outputs = [outputs[x].reshape(-1).astype(np.int64) for x in outputs.files]
# Load as float64 and infer types later
test_inputs = [inputs[x].reshape(-1).astype(np.float64) for x in inputs.files]
test_outputs = [outputs[x].reshape(-1).astype(np.float64) for x in outputs.files]

# WIESEP: Hack to get CI running because only one specific array is used
if "WaveFormer" in args.dir:
Expand Down
29 changes: 28 additions & 1 deletion DeeployTest/testTypes.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@
import pytest

from Deeploy.AbstractDataTypes import PointerClass, StructClass
from Deeploy.CommonExtensions.DataTypes import IntegerDataTypes, int8_t, int16_t, int32_t
from Deeploy.CommonExtensions.DataTypes import IntegerDataTypes, bfloat16_t, float32_t, int8_t, int16_t, int32_t
from Deeploy.DeeployTypes import ConstantBuffer, NetworkContext, StructBuffer, TransientBuffer, VariableBuffer


Expand Down Expand Up @@ -93,6 +93,32 @@ def testImmediatePromotion():
return True


def testImmediatePromotionFloat():
with pytest.raises(Exception):
_ = bfloat16_t(0.1)
_ = bfloat16_t(7777777.0)
_ = bfloat16_t(0.2)
_ = float32_t(77777777.0)
_ = float32_t(0.0000800006853044033050537109375)
c = bfloat16_t(7777777)
a = bfloat16_t(12.375)
b = bfloat16_t(0.5)
c = float32_t(7777777.0)
d = float32_t(77777776.0)

e = float32_t(0.0000900006853044033050537109375)

_ = bfloat16_t(0.000079631805419921875)

with pytest.raises(Exception):
_ = bfloat16_t(c)
_ = bfloat16_t(d)
_ = bfloat16_t(e)
_ = bfloat16_t(0.000079631805419921885)

return True


def generateTestStruct() -> StructClass:
testStructType = {"f1": int32_t, "f2": int8_t}
s1 = StructClass("s2", testStructType)
Expand Down Expand Up @@ -223,6 +249,7 @@ def testPointerTypeEquivalence():
testImmediateSerialization()
testImmediatePromotion()
testImmediateTypeEquivalence()
testImmediatePromotionFloat()

testStructSerialization()
testStructPromotion()
Expand Down
Loading
Loading