Skip to content

Commit

Permalink
Merge branch 'fc/astral-fix-3x3' into fc/neureka-ecc-regs
Browse files Browse the repository at this point in the history
  • Loading branch information
FrancescoConti committed Aug 23, 2024
2 parents fe2be29 + c20c03c commit 51c3e36
Show file tree
Hide file tree
Showing 6 changed files with 79 additions and 44 deletions.
2 changes: 1 addition & 1 deletion neureka/hal/neureka_task.c
Original file line number Diff line number Diff line change
Expand Up @@ -169,7 +169,7 @@ void neureka_task_set_strides(neureka_task_t *task, const uint32_t k_in,
if (task->kernel_shape == 1) { // 1x1
task->data.cfg.weights_stride.d0 = NEUREKA_WEIGHT_BANDWIDTH_BYTES_1x1;
task->data.cfg.weights_stride.d1 =
NEUREKA_WEIGHT_BANDWIDTH_BYTES_1x1 * num_k_in;
(NEUREKA_WEIGHT_BANDWIDTH_BYTES_1x1 / 8) * task->qw * num_k_in;
} else if (!task->depthwise) { // 3x3
task->data.cfg.weights_stride.d0 = NEUREKA_WEIGHT_BANDWIDTH_BYTES_3x3;
task->data.cfg.weights_stride.d1 =
Expand Down
39 changes: 35 additions & 4 deletions test/NeuralEngineFunctionalModel.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,13 +2,23 @@

import torch
import torch.nn.functional as F
import numpy as np

from TestClasses import IntegerType, Padding, Stride


class NeuralEngineFunctionalModel:
ACCUMULATOR_TYPE = IntegerType(name="int32")

@staticmethod
def _tensor_to_hex(tensor):
int_tensor = np.asarray(torch.floor(tensor).to(torch.int64))
int_tensor[int_tensor < 0] = 0xffffffff + (int_tensor[int_tensor < 0]+1)
hex_tensor = np.empty(int_tensor.shape, dtype=object)
for idx in np.ndindex(int_tensor.shape):
hex_tensor[idx] = hex(int_tensor[idx].item())
return hex_tensor

@staticmethod
def _cast(
tensor: torch.Tensor, _type: IntegerType, saturate: bool = False
Expand Down Expand Up @@ -36,7 +46,10 @@ def _norm_quant(

if verbose:
print("INTERMEDIATE RESULTS (after scale):")
print(tensor)
current_threshold = np.get_printoptions()['threshold']
np.set_printoptions(threshold=np.inf)
print(NeuralEngineFunctionalModel._tensor_to_hex(tensor))
np.set_printoptions(threshold=current_threshold)

if has_bias:
assert bias is not None
Expand All @@ -54,7 +67,10 @@ def _norm_quant(

if verbose:
print("INTERMEDIATE RESULTS (after bias):")
print(tensor)
current_threshold = np.get_printoptions()['threshold']
np.set_printoptions(threshold=np.inf)
print(NeuralEngineFunctionalModel._tensor_to_hex(tensor))
np.set_printoptions(threshold=current_threshold)

if has_relu:
tensor = F.relu(tensor)
Expand All @@ -63,7 +79,10 @@ def _norm_quant(

if verbose:
print("INTERMEDIATE RESULTS (after shift):")
print(tensor)
current_threshold = np.get_printoptions()['threshold']
np.set_printoptions(threshold=np.inf)
print(NeuralEngineFunctionalModel._tensor_to_hex(tensor))
np.set_printoptions(threshold=current_threshold)

# Saturate into out_type
tensor = NeuralEngineFunctionalModel._cast(tensor, out_type, saturate=True)
Expand Down Expand Up @@ -102,6 +121,15 @@ def convolution(
0,
)

if verbose:
print("INPUTS (padded):")
current_threshold = np.get_printoptions()['threshold']
np.set_printoptions(threshold=np.inf)
print(NeuralEngineFunctionalModel._tensor_to_hex(input_padded))
print("WEIGHTS (padded):")
print(NeuralEngineFunctionalModel._tensor_to_hex(weight))
np.set_printoptions(threshold=current_threshold)

# Accumulators are 32bit non-saturating.
# Calculate in higher precision (int64)
output = F.conv2d(
Expand All @@ -118,7 +146,10 @@ def convolution(

if verbose:
print("INTERMEDIATE RESULTS (pre-normalization/requant):")
print(output)
current_threshold = np.get_printoptions()['threshold']
np.set_printoptions(threshold=np.inf)
print(NeuralEngineFunctionalModel._tensor_to_hex(output))
np.set_printoptions(threshold=current_threshold)

if has_norm_quant:
assert scale is not None
Expand Down
11 changes: 2 additions & 9 deletions test/NeurekaMemoryLayout.py
Original file line number Diff line number Diff line change
Expand Up @@ -88,15 +88,8 @@ def weightEncode(
elif height == 1 and width == 1:
# (cout * cinMajor, Bits * cinSubtile)
weight = weight.reshape(-1, bits * cinSubtile)
# Pad only the last dimension to weight bandwidth size
# (-1, Weight Bandwidth)
weight = np.pad(
weight,
((0, 0), (0, NeurekaMemoryLayout._WEIGHT_BANDWIDTH_1x1 - weight.shape[-1])),
"constant",
constant_values=0,
)
weightBandwidthBytes = int(np.ceil(NeurekaMemoryLayout._WEIGHT_BANDWIDTH_1x1 / 8))
# No padding needed here
weightBandwidthBytes = int(np.ceil(bits * cinSubtile / 8))

# Prepare for packing
# (-1, Weight Bandwidth Bytes, 8)
Expand Down
2 changes: 1 addition & 1 deletion test/NeurekaTestConf.py
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,7 @@ def check_valid_out_type(cls, v: IntegerType) -> IntegerType:
@field_validator("weight_type")
@classmethod
def check_valid_weight_type(cls, v: IntegerType) -> IntegerType:
NeurekaTestConf._check_type("weight_type", v, ["int8"])
NeurekaTestConf._check_type("weight_type", v, ["int8", "int7", "int6", "int5", "int4", "int3", "int2"])
return v

@field_validator("scale_type")
Expand Down
53 changes: 39 additions & 14 deletions test/NnxTestClasses.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,8 @@ class NnxTestConf(BaseModel):
has_norm_quant: bool
has_bias: bool
has_relu: bool
synthetic_weights: bool
synthetic_inputs: bool

@model_validator(mode="after") # type: ignore
def check_valid_depthwise_channels(self) -> NnxTestConf:
Expand Down Expand Up @@ -116,6 +118,8 @@ def __init__(
scale: Optional[torch.Tensor] = None,
bias: Optional[torch.Tensor] = None,
global_shift: Optional[torch.Tensor] = torch.Tensor([0]),
synthetic_weights: Optional[bool] = False,
synthetic_inputs: Optional[bool] = False,
) -> None:
self.conf = conf
self.input = input
Expand All @@ -124,6 +128,8 @@ def __init__(
self.scale = scale
self.bias = bias
self.global_shift = global_shift
self.synthetic_weights = synthetic_weights
self.synthetic_inputs = synthetic_inputs

def is_valid(self) -> bool:
return all(
Expand Down Expand Up @@ -207,7 +213,11 @@ def _calculate_global_shift(
"""Calculate global shift so that the output values are in the range of out_type"""
s = tensor.type(torch.float64).std()
target_s = 2 ** (out_type._bits - 1)
return torch.ceil(torch.log2(s / target_s)).type(torch.int32)
shift = torch.ceil(torch.log2(s / target_s)).type(torch.int32)
if shift < 1:
return torch.zeros((1,)).type(torch.int32)
else:
return shift

@staticmethod
def _random_data(_type: IntegerType, shape: Tuple, extremes: Tuple = None):
Expand Down Expand Up @@ -243,20 +253,30 @@ def from_conf(
bias_shape = (1, conf.out_channel, 1, 1)

if input is None:
input = NnxTestGenerator._random_data(
_type=conf.in_type,
shape=input_shape,
)
if conf.synthetic_inputs:
inputs = torch.zeros((1, conf.in_channel, conf.in_height, conf.in_width), dtype=torch.int64)
for i in range(conf.in_channel):
inputs[:, i,0,0] = i
else:
input = NnxTestGenerator._random_data(
_type=conf.in_type,
shape=input_shape,
)

if weight is None:
weight_mean = NnxTestGenerator._DEFAULT_WEIGHT_MEAN
weight_std = NnxTestGenerator._DEFAULT_WEIGHT_STDEV * (1<<(conf.weight_type._bits-1)-1)
weight = NnxTestGenerator._random_data_normal(
mean = weight_mean,
std = weight_std,
_type=conf.weight_type,
shape=weight_shape,
)
if conf.synthetic_weights:
weight = torch.zeros((conf.out_channel, 1 if conf.depthwise else conf.in_channel, conf.kernel_shape.height, conf.kernel_shape.width), dtype=torch.int64)
for i in range(0, min(weight.shape[0], weight.shape[1])):
weight[i,i,0,0] = 1
else:
weight_mean = NnxTestGenerator._DEFAULT_WEIGHT_MEAN
weight_std = NnxTestGenerator._DEFAULT_WEIGHT_STDEV * (1<<(conf.weight_type._bits-1)-1)
weight = NnxTestGenerator._random_data_normal(
mean = weight_mean,
std = weight_std,
_type=conf.weight_type,
shape=weight_shape,
)

if conf.has_norm_quant:
if scale is None:
Expand Down Expand Up @@ -306,6 +326,8 @@ def from_conf(
scale=scale,
bias=bias,
global_shift=global_shift,
synthetic_inputs=conf.synthetic_inputs,
synthetic_weights=conf.synthetic_weights,
)

@staticmethod
Expand Down Expand Up @@ -361,7 +383,10 @@ def generate(self, test_name: str, test: NnxTest):
weight_type = test.conf.weight_type
weight_bits = weight_type._bits
assert weight_bits > 1 and weight_bits <= 8
weight_offset = -(2 ** (weight_bits - 1))
if test.synthetic_weights:
weight_offset = 0
else:
weight_offset = -(2 ** (weight_bits - 1))
weight_out_ch, weight_in_ch, weight_ks_h, weight_ks_w = test.weight.shape
weight_data: np.ndarray = test.weight.numpy() - weight_offset
weight_init = self.weightEncode(
Expand Down
16 changes: 1 addition & 15 deletions test/testgen.py
Original file line number Diff line number Diff line change
Expand Up @@ -86,21 +86,7 @@ def test_gen(
exit(-1)

test_conf = nnxTestConfCls.model_validate(test_conf_dict)
if test_conf_dict['synthetic_weights']:
import torch
weight = torch.zeros((test_conf.out_channel, 1 if test_conf.depthwise else test_conf.in_channel, test_conf.kernel_shape.height, test_conf.kernel_shape.width), dtype=torch.int64)
for i in range(0, min(weight.shape[0], weight.shape[1])):
weight[i,i,0,0] = 1
else:
weight = None
if test_conf_dict['synthetic_inputs']:
import torch
inputs = torch.zeros((1, test_conf.in_channel, test_conf.in_height, test_conf.in_width), dtype=torch.int64)
for i in range(test_conf.in_channel):
inputs[:, i,0,0] = i
else:
inputs = None
test = NnxTestGenerator.from_conf(test_conf, verbose=args.print_tensors, weight=weight, input=inputs)
test = NnxTestGenerator.from_conf(test_conf, verbose=args.print_tensors)
if not args.skip_save:
test.save(args.test_dir)
if args.headers:
Expand Down

0 comments on commit 51c3e36

Please sign in to comment.