From 6d142630bc293fed719eb66903ce70211fcb860a Mon Sep 17 00:00:00 2001 From: Aba Date: Wed, 25 Oct 2023 21:27:58 -0700 Subject: [PATCH] Python - tiling all layers --- c/model.h | 24 +++--- c/runtime.h | 2 +- test/py/bundle.py | 4 +- test/py/param_test.py | 8 +- test/py/tiling.ipynb | 173 ++++++++++++++++++++---------------------- 5 files changed, 103 insertions(+), 108 deletions(-) diff --git a/c/model.h b/c/model.h index 8f0e40e..233c03a 100644 --- a/c/model.h +++ b/c/model.h @@ -1,12 +1,12 @@ #define N_BUNDLES 7 Bundle_t bundles [N_BUNDLES] = { - {.n=8, .l=2, .kw=11, .coe=2, .coe_tl=2, .r_ll=2, .h=10, .w=8, .ci=3, .co=16, .w_kw2=3, .t=8, .p=3, .cm=1, .cm_p0=1, .w_bpt=272, .w_bpt_p0=272, .x_bpt=840, .x_bpt_p0=840, .is_bias=1, .b_offset=0, .b_val_shift=5, .b_bias_shift=0, .ca_nzero=0, .ca_shift=12, .ca_pl_scale=0, .x_header=414341061322735616, .x_header_p0=414341061322735616, .w_header=414587437826703360, .w_header_p0=414341061322735616 }, - {.n=8, .l=2, .kw=1, .coe=24, .coe_tl=0, .r_ll=2, .h=10, .w=8, .ci=16, .co=16, .w_kw2=8, .t=1, .p=1, .cm=20, .cm_p0=16, .w_bpt=392, .w_bpt_p0=392, .x_bpt=13320, .x_bpt_p0=13320, .is_bias=0, .b_offset=16, .b_val_shift=0, .b_bias_shift=0, .ca_nzero=1, .ca_shift=7, .ca_pl_scale=0, .x_header=8700964375684448256, .x_header_p0=8700964375684448256, .w_header=8701210795138088960, .w_header_p0=8700964375684448256 }, - {.n=8, .l=2, .kw=7, .coe=3, .coe_tl=4, .r_ll=2, .h=10, .w=8, .ci=16, .co=16, .w_kw2=5, .t=6, .p=8, .cm=2, .cm_p0=2, .w_bpt=344, .w_bpt_p0=344, .x_bpt=1672, .x_bpt_p0=1672, .is_bias=1, .b_offset=16, .b_val_shift=5, .b_bias_shift=0, .ca_nzero=1, .ca_shift=12, .ca_pl_scale=0, .x_header=846686625550303232, .x_header_p0=846686625550303232, .w_header=846933027824074752, .w_header_p0=846686625550303232 }, - {.n=8, .l=2, .kw=5, .coe=4, .coe_tl=4, .r_ll=2, .h=10, .w=8, .ci=16, .co=16, .w_kw2=6, .t=4, .p=4, .cm=4, .cm_p0=4, .w_bpt=488, .w_bpt_p0=488, .x_bpt=3336, .x_bpt_p0=3336, .is_bias=0, .b_offset=34, .b_val_shift=0, .b_bias_shift=0, .ca_nzero=1, .ca_shift=10, .ca_pl_scale=3, .x_header=1927550536119222272, .x_header_p0=1927550536119222272, .w_header=1927796989932601344, .w_header_p0=1927550536119222272 }, - {.n=8, .l=2, .kw=3, .coe=8, .coe_tl=8, .r_ll=2, .h=10, .w=8, .ci=16, .co=24, .w_kw2=7, .t=3, .p=3, .cm=6, .cm_p0=4, .w_bpt=440, .w_bpt_p0=296, .x_bpt=5000, .x_bpt_p0=3336, .is_bias=1, .b_offset=34, .b_val_shift=5, .b_bias_shift=0, .ca_nzero=0, .ca_shift=12, .ca_pl_scale=0, .x_header=3008414446688141312, .x_header_p0=1855492942081294336, .w_header=3008660883321651200, .w_header_p0=1855492942081294336 }, - {.n=8, .l=2, .kw=1, .coe=24, .coe_tl=2, .r_ll=2, .h=10, .w=8, .ci=24, .co=50, .w_kw2=8, .t=3, .p=2, .cm=20, .cm_p0=4, .w_bpt=488, .w_bpt_p0=104, .x_bpt=16648, .x_bpt_p0=3336, .is_bias=0, .b_offset=58, .b_val_shift=0, .b_bias_shift=0, .ca_nzero=1, .ca_shift=10, .ca_pl_scale=3, .x_header=11006807384898142208, .x_header_p0=1783435348043366400, .w_header=11007053838711521280, .w_header_p0=1783435348043366400 }, - {.n=1, .l=1, .kw=1, .coe=24, .coe_tl=0, .r_ll=8, .h=8, .w=1, .ci=4000, .co=10, .w_kw2=1, .t=1, .p=200, .cm=20, .cm_p0=20, .w_bpt=488, .w_bpt_p0=488, .x_bpt=138, .x_bpt_p0=138, .is_bias=1, .b_offset=58, .b_val_shift=5, .b_bias_shift=0, .ca_nzero=1, .ca_shift=15, .ca_pl_scale=3, .x_header=10952754293765046272, .x_header_p0=10952754293765046272, .w_header=10952754456973803520, .w_header_p0=10952754293765046272 } + {.n=8, .l=3, .kw=11, .coe=2, .coe_tl=2, .r_ll=2, .h=18, .w=8, .ci=3, .co=16, .w_kw2=3, .t=8, .p=3, .cm=1, .cm_p0=1, .w_bpt=272, .w_bpt_p0=272, .x_bpt=1256, .x_bpt_p0=1256, .is_bias=1, .conv2dense=0, .b_offset=0, .b_val_shift=5, .b_bias_shift=0, .ca_nzero=0, .ca_shift=12, .ca_pl_scale=0, .x_header=414349857415757824, .x_header_p0=414349857415757824, .w_header=414596233919725568, .w_header_p0=414349857415757824 }, + {.n=8, .l=3, .kw=1, .coe=24, .coe_tl=0, .r_ll=2, .h=18, .w=8, .ci=16, .co=16, .w_kw2=8, .t=1, .p=1, .cm=20, .cm_p0=16, .w_bpt=392, .w_bpt_p0=392, .x_bpt=19976, .x_bpt_p0=19976, .is_bias=0, .conv2dense=0, .b_offset=16, .b_val_shift=0, .b_bias_shift=0, .ca_nzero=1, .ca_shift=7, .ca_pl_scale=0, .x_header=8700973171777470464, .x_header_p0=8700973171777470464, .w_header=8701219591231111168, .w_header_p0=8700973171777470464 }, + {.n=8, .l=3, .kw=7, .coe=3, .coe_tl=4, .r_ll=2, .h=18, .w=8, .ci=16, .co=16, .w_kw2=5, .t=6, .p=8, .cm=2, .cm_p0=2, .w_bpt=344, .w_bpt_p0=344, .x_bpt=2504, .x_bpt_p0=2504, .is_bias=1, .conv2dense=0, .b_offset=16, .b_val_shift=5, .b_bias_shift=0, .ca_nzero=1, .ca_shift=12, .ca_pl_scale=0, .x_header=846695421643325440, .x_header_p0=846695421643325440, .w_header=846941823917096960, .w_header_p0=846695421643325440 }, + {.n=8, .l=3, .kw=5, .coe=4, .coe_tl=4, .r_ll=2, .h=18, .w=8, .ci=16, .co=16, .w_kw2=6, .t=4, .p=4, .cm=4, .cm_p0=4, .w_bpt=488, .w_bpt_p0=488, .x_bpt=5000, .x_bpt_p0=5000, .is_bias=0, .conv2dense=0, .b_offset=34, .b_val_shift=0, .b_bias_shift=0, .ca_nzero=1, .ca_shift=10, .ca_pl_scale=3, .x_header=1927559332212244480, .x_header_p0=1927559332212244480, .w_header=1927805786025623552, .w_header_p0=1927559332212244480 }, + {.n=8, .l=3, .kw=3, .coe=8, .coe_tl=8, .r_ll=2, .h=18, .w=8, .ci=16, .co=24, .w_kw2=7, .t=3, .p=3, .cm=6, .cm_p0=4, .w_bpt=440, .w_bpt_p0=296, .x_bpt=7496, .x_bpt_p0=5000, .is_bias=1, .conv2dense=0, .b_offset=34, .b_val_shift=5, .b_bias_shift=0, .ca_nzero=0, .ca_shift=12, .ca_pl_scale=0, .x_header=3008423242781163520, .x_header_p0=1855501738174316544, .w_header=3008669679414673408, .w_header_p0=1855501738174316544 }, + {.n=8, .l=3, .kw=1, .coe=24, .coe_tl=2, .r_ll=2, .h=18, .w=8, .ci=24, .co=50, .w_kw2=8, .t=3, .p=2, .cm=20, .cm_p0=4, .w_bpt=488, .w_bpt_p0=104, .x_bpt=24968, .x_bpt_p0=5000, .is_bias=0, .conv2dense=1, .b_offset=58, .b_val_shift=0, .b_bias_shift=0, .ca_nzero=1, .ca_shift=10, .ca_pl_scale=3, .x_header=11006816180991164416, .x_header_p0=1783444144136388608, .w_header=11007062634804543488, .w_header_p0=1783444144136388608 }, + {.n=1, .l=1, .kw=1, .coe=24, .coe_tl=0, .r_ll=8, .h=8, .w=1, .ci=7200, .co=10, .w_kw2=1, .t=1, .p=360, .cm=20, .cm_p0=20, .w_bpt=488, .w_bpt_p0=488, .x_bpt=138, .x_bpt_p0=138, .is_bias=1, .conv2dense=0, .b_offset=58, .b_val_shift=5, .b_bias_shift=0, .ca_nzero=1, .ca_shift=15, .ca_pl_scale=3, .x_header=10952754293765046272, .x_header_p0=10952754293765046272, .w_header=10952754456973803520, .w_header_p0=10952754293765046272 } }; #define X_BITS_L2 2 @@ -16,11 +16,11 @@ Bundle_t bundles [N_BUNDLES] = { #define PE_ROWS 8 #define PE_COLS 24 -#define WB_BYTES 134308 -#define W_BYTES 134144 -#define X_BYTES 2520 -#define X_BYTES_ALL 103480 -#define Y_BYTES 294920 +#define WB_BYTES 212388 +#define W_BYTES 212224 +#define X_BYTES 3768 +#define X_BYTES_ALL 163416 +#define Y_BYTES 442376 #define B_TYPE signed short #define B_WORDS 82 #define DATA_DIR "D:/dnn-engine/test/vectors" diff --git a/c/runtime.h b/c/runtime.h index ef1e52a..1fdb439 100644 --- a/c/runtime.h +++ b/c/runtime.h @@ -9,7 +9,7 @@ typedef struct { const int n, l, kw, coe, coe_tl, r_ll, h, w, ci, co, w_kw2, t, p, cm, cm_p0; const int w_bpt, w_bpt_p0, x_bpt, x_bpt_p0; // bytes per transfer - const char is_bias; + const char is_bias, conv2dense; const int b_offset, b_val_shift, b_bias_shift; const signed char ca_nzero, ca_shift, ca_pl_scale; const unsigned long long x_header, x_header_p0, w_header, w_header_p0; // 64 bits (at least) diff --git a/test/py/bundle.py b/test/py/bundle.py index 8e1fda7..51de063 100644 --- a/test/py/bundle.py +++ b/test/py/bundle.py @@ -420,7 +420,7 @@ def clog2(x): c = namedtuple('Compile', params)(**params) return c - def export (self, c): + def export (self, c, is_last): if self.core['type'] != 'conv': print('Conv -> Dense Reshape') @@ -458,7 +458,7 @@ def export (self, c): self.xe = self.reorder_x_q2e_conv(x_int, c, r) self.ye_exp = self.reorder_y_q2e_conv(y_int, c, r) self.o_int = o_int - self.oe_exp = self.reorder_y_q2e_conv(o_int, c, r) + self.oe_exp = o_int if is_last else self.reorder_y_q2e_conv(o_int, c, r) print(f"x reshape: [int]:{self.inp['int'].shape}, int:{x_int.shape}. xe:{self.xe[0].shape}") ''' diff --git a/test/py/param_test.py b/test/py/param_test.py index e8f61fa..b0d8113 100644 --- a/test/py/param_test.py +++ b/test/py/param_test.py @@ -189,7 +189,7 @@ class Config: def test_dnn_engine(COMPILE): c = make_compile_params(COMPILE) - input_shape = (8,10,8,3) # (XN, XH, XW, CI) + input_shape = (8,18,8,3) # (XN, XH, XW, CI) model_config = [ Config(11, 16, True , f'quantized_relu({c.X_BITS},0,negative_slope=0)'), Config(1 , 16, False, f'quantized_bits({c.X_BITS},0,False,False,1)'), @@ -240,10 +240,10 @@ def test_dnn_engine(COMPILE): ''' Export ''' - for b in bundles: + for ib, b in enumerate(bundles): print(f'-----------------{b.idx}-----------------------') b.process(inp if b.idx==0 else None, c) - b.export(c) + b.export(c, ib==len(bundles)-1) @@ -280,7 +280,7 @@ def test_dnn_engine(COMPILE): ca_nzero, ca_shift, ca_pl_scale = b.core['act']['non_zero'], b.core['act']['shift_bits'], b.core['act']['plog_slope'] - ch.write(f" {{.n={b.r.XN}, .l={b.r.L}, .kw={b.r.KW}, .coe={y_coe}, .coe_tl={y_coe_tl}, .r_ll={y_r_ll}, .h={b.r.XH}, .w={b.r.XW}, .ci={b.r.CI}, .co={b.r.CO}, .w_kw2={b.r.XW-b.r.KW//2}, .t={b.r.IT}, .p={b.r.CP}, .cm={b.r.CM}, .cm_p0={b.r.CM_0}, .w_bpt={w_bpt}, .w_bpt_p0={w_bpt_p0}, .x_bpt={x_bpt}, .x_bpt_p0={x_bpt_p0}, .is_bias={1*(b.b is not None)}, .b_offset={b_words}, .b_val_shift={b.bias_val_shift}, .b_bias_shift={b.bias_b_shift}, .ca_nzero={ca_nzero}, .ca_shift={ca_shift}, .ca_pl_scale={ca_pl_scale}, .x_header={b.r.x_header_be_p[-1][0]}, .x_header_p0={b.r.x_header_be_p[0][0]}, .w_header={b.r.w_header_be_p[-1][0]}, .w_header_p0={b.r.x_header_be_p[0][0]} }}") + ch.write(f" {{.n={b.r.XN}, .l={b.r.L}, .kw={b.r.KW}, .coe={y_coe}, .coe_tl={y_coe_tl}, .r_ll={y_r_ll}, .h={b.r.XH}, .w={b.r.XW}, .ci={b.r.CI}, .co={b.r.CO}, .w_kw2={b.r.XW-b.r.KW//2}, .t={b.r.IT}, .p={b.r.CP}, .cm={b.r.CM}, .cm_p0={b.r.CM_0}, .w_bpt={w_bpt}, .w_bpt_p0={w_bpt_p0}, .x_bpt={x_bpt}, .x_bpt_p0={x_bpt_p0}, .is_bias={1*(b.b is not None)}, .conv2dense={1*b.flatten}, .b_offset={b_words}, .b_val_shift={b.bias_val_shift}, .b_bias_shift={b.bias_b_shift}, .ca_nzero={ca_nzero}, .ca_shift={ca_shift}, .ca_pl_scale={ca_pl_scale}, .x_header={b.r.x_header_be_p[-1][0]}, .x_header_p0={b.r.x_header_be_p[0][0]}, .w_header={b.r.w_header_be_p[-1][0]}, .w_header_p0={b.r.x_header_be_p[0][0]} }}") b_words += b.be.size if b.b else 0 if b.idx != len(bundles)-1: diff --git a/test/py/tiling.ipynb b/test/py/tiling.ipynb index d21d724..c031a54 100644 --- a/test/py/tiling.ipynb +++ b/test/py/tiling.ipynb @@ -8,7 +8,7 @@ { "data": { "text/plain": [ - "(16384, 10240, 26624)" + "(192, 80, 80)" ] }, "execution_count": 1, @@ -20,20 +20,18 @@ "import numpy as np\n", "from collections import namedtuple\n", "\n", - "ib = 3\n", + "ib = 6\n", "ROWS = 8\n", "X_PAD = 5\n", "KH_MAX = 11\n", "text = '''{\n", - " {.n=8, .l=2, .kw=11, .coe=2, .coe_tl=2, .r_ll=2, .h=10, .w=8, .ci=3, .co=16, .w_kw2=3, .t=8, .p=3, .cm=1, .cm_p0=1, .w_bpt=272, .w_bpt_p0=272, .x_bpt=840, .x_bpt_p0=840, .is_bias=1, .b_offset=0, .b_val_shift=5, .b_bias_shift=0, .ca_nzero=0, .ca_shift=12, .ca_pl_scale=0, .x_header=414341061322735616, .x_header_p0=414341061322735616, .w_header=414587437826703360, .w_header_p0=414341061322735616 },\n", - " {.n=8, .l=2, .kw=1, .coe=24, .coe_tl=0, .r_ll=2, .h=10, .w=8, .ci=16, .co=16, .w_kw2=8, .t=1, .p=1, .cm=20, .cm_p0=16, .w_bpt=392, .w_bpt_p0=392, .x_bpt=13320, .x_bpt_p0=13320, .is_bias=0, .b_offset=16, .b_val_shift=0, .b_bias_shift=0, .ca_nzero=1, .ca_shift=7, .ca_pl_scale=0, .x_header=8700964375684448256, .x_header_p0=8700964375684448256, .w_header=8701210795138088960, .w_header_p0=8700964375684448256 },\n", - " {.n=8, .l=2, .kw=7, .coe=3, .coe_tl=4, .r_ll=2, .h=10, .w=8, .ci=16, .co=16, .w_kw2=5, .t=6, .p=8, .cm=2, .cm_p0=2, .w_bpt=344, .w_bpt_p0=344, .x_bpt=1672, .x_bpt_p0=1672, .is_bias=1, .b_offset=16, .b_val_shift=5, .b_bias_shift=0, .ca_nzero=1, .ca_shift=12, .ca_pl_scale=0, .x_header=846686625550303232, .x_header_p0=846686625550303232, .w_header=846933027824074752, .w_header_p0=846686625550303232 },\n", - " \n", - " {.n=8, .l=2, .kw=5, .coe=4, .coe_tl=4, .r_ll=2, .h=10, .w=8, .ci=16, .co=16, .w_kw2=6, .t=4, .p=4, .cm=4, .cm_p0=4, .w_bpt=488, .w_bpt_p0=488, .x_bpt=3336, .x_bpt_p0=3336, .is_bias=0, .b_offset=34, .b_val_shift=0, .b_bias_shift=0, .ca_nzero=1, .ca_shift=10, .ca_pl_scale=3, .x_header=1927550536119222272, .x_header_p0=1927550536119222272, .w_header=1927796989932601344, .w_header_p0=1927550536119222272 },\n", - " \n", - " {.n=8, .l=2, .kw=3, .coe=8, .coe_tl=8, .r_ll=2, .h=10, .w=8, .ci=16, .co=24, .w_kw2=7, .t=3, .p=3, .cm=6, .cm_p0=4, .w_bpt=440, .w_bpt_p0=296, .x_bpt=5000, .x_bpt_p0=3336, .is_bias=1, .b_offset=34, .b_val_shift=5, .b_bias_shift=0, .ca_nzero=0, .ca_shift=12, .ca_pl_scale=0, .x_header=3008414446688141312, .x_header_p0=1855492942081294336, .w_header=3008660883321651200, .w_header_p0=1855492942081294336 },\n", - " {.n=8, .l=2, .kw=1, .coe=24, .coe_tl=2, .r_ll=2, .h=10, .w=8, .ci=24, .co=50, .w_kw2=8, .t=3, .p=2, .cm=20, .cm_p0=4, .w_bpt=488, .w_bpt_p0=104, .x_bpt=16648, .x_bpt_p0=3336, .is_bias=0, .b_offset=58, .b_val_shift=0, .b_bias_shift=0, .ca_nzero=1, .ca_shift=10, .ca_pl_scale=3, .x_header=11006807384898142208, .x_header_p0=1783435348043366400, .w_header=11007053838711521280, .w_header_p0=1783435348043366400 },\n", - " {.n=1, .l=1, .kw=1, .coe=24, .coe_tl=0, .r_ll=8, .h=8, .w=1, .ci=4000, .co=10, .w_kw2=1, .t=1, .p=200, .cm=20, .cm_p0=20, .w_bpt=488, .w_bpt_p0=488, .x_bpt=138, .x_bpt_p0=138, .is_bias=1, .b_offset=58, .b_val_shift=5, .b_bias_shift=0, .ca_nzero=1, .ca_shift=15, .ca_pl_scale=3, .x_header=10952754293765046272, .x_header_p0=10952754293765046272, .w_header=10952754456973803520, .w_header_p0=10952754293765046272 }\n", + " {.n=8, .l=3, .kw=11, .coe=2, .coe_tl=2, .r_ll=2, .h=18, .w=8, .ci=3, .co=16, .w_kw2=3, .t=8, .p=3, .cm=1, .cm_p0=1, .w_bpt=272, .w_bpt_p0=272, .x_bpt=1256, .x_bpt_p0=1256, .is_bias=1, .conv2dense=0, .b_offset=0, .b_val_shift=5, .b_bias_shift=0, .ca_nzero=0, .ca_shift=12, .ca_pl_scale=0, .x_header=414349857415757824, .x_header_p0=414349857415757824, .w_header=414596233919725568, .w_header_p0=414349857415757824 },\n", + " {.n=8, .l=3, .kw=1, .coe=24, .coe_tl=0, .r_ll=2, .h=18, .w=8, .ci=16, .co=16, .w_kw2=8, .t=1, .p=1, .cm=20, .cm_p0=16, .w_bpt=392, .w_bpt_p0=392, .x_bpt=19976, .x_bpt_p0=19976, .is_bias=0, .conv2dense=0, .b_offset=16, .b_val_shift=0, .b_bias_shift=0, .ca_nzero=1, .ca_shift=7, .ca_pl_scale=0, .x_header=8700973171777470464, .x_header_p0=8700973171777470464, .w_header=8701219591231111168, .w_header_p0=8700973171777470464 },\n", + " {.n=8, .l=3, .kw=7, .coe=3, .coe_tl=4, .r_ll=2, .h=18, .w=8, .ci=16, .co=16, .w_kw2=5, .t=6, .p=8, .cm=2, .cm_p0=2, .w_bpt=344, .w_bpt_p0=344, .x_bpt=2504, .x_bpt_p0=2504, .is_bias=1, .conv2dense=0, .b_offset=16, .b_val_shift=5, .b_bias_shift=0, .ca_nzero=1, .ca_shift=12, .ca_pl_scale=0, .x_header=846695421643325440, .x_header_p0=846695421643325440, .w_header=846941823917096960, .w_header_p0=846695421643325440 },\n", + " {.n=8, .l=3, .kw=5, .coe=4, .coe_tl=4, .r_ll=2, .h=18, .w=8, .ci=16, .co=16, .w_kw2=6, .t=4, .p=4, .cm=4, .cm_p0=4, .w_bpt=488, .w_bpt_p0=488, .x_bpt=5000, .x_bpt_p0=5000, .is_bias=0, .conv2dense=0, .b_offset=34, .b_val_shift=0, .b_bias_shift=0, .ca_nzero=1, .ca_shift=10, .ca_pl_scale=3, .x_header=1927559332212244480, .x_header_p0=1927559332212244480, .w_header=1927805786025623552, .w_header_p0=1927559332212244480 },\n", + " {.n=8, .l=3, .kw=3, .coe=8, .coe_tl=8, .r_ll=2, .h=18, .w=8, .ci=16, .co=24, .w_kw2=7, .t=3, .p=3, .cm=6, .cm_p0=4, .w_bpt=440, .w_bpt_p0=296, .x_bpt=7496, .x_bpt_p0=5000, .is_bias=1, .conv2dense=0, .b_offset=34, .b_val_shift=5, .b_bias_shift=0, .ca_nzero=0, .ca_shift=12, .ca_pl_scale=0, .x_header=3008423242781163520, .x_header_p0=1855501738174316544, .w_header=3008669679414673408, .w_header_p0=1855501738174316544 },\n", + " {.n=8, .l=3, .kw=1, .coe=24, .coe_tl=2, .r_ll=2, .h=18, .w=8, .ci=24, .co=50, .w_kw2=8, .t=3, .p=2, .cm=20, .cm_p0=4, .w_bpt=488, .w_bpt_p0=104, .x_bpt=24968, .x_bpt_p0=5000, .is_bias=0, .conv2dense=1, .b_offset=58, .b_val_shift=0, .b_bias_shift=0, .ca_nzero=1, .ca_shift=10, .ca_pl_scale=3, .x_header=11006816180991164416, .x_header_p0=1783444144136388608, .w_header=11007062634804543488, .w_header_p0=1783444144136388608 },\n", + " {.n=1, .l=1, .kw=1, .coe=24, .coe_tl=0, .r_ll=8, .h=8, .w=1, .ci=7200, .co=10, .w_kw2=1, .t=1, .p=360, .cm=20, .cm_p0=20, .w_bpt=488, .w_bpt_p0=488, .x_bpt=138, .x_bpt_p0=138, .is_bias=1, .conv2dense=0, .b_offset=58, .b_val_shift=5, .b_bias_shift=0, .ca_nzero=1, .ca_shift=15, .ca_pl_scale=3, .x_header=10952754293765046272, .x_header_p0=10952754293765046272, .w_header=10952754456973803520, .w_header_p0=10952754293765046272 }\n", "};\n", "'''\n", "\n", @@ -64,20 +62,20 @@ "b = bundles[ib]\n", "\n", "if ib == len(bundles)-1:\n", - " xe = yq\n", + " xe = np.copy(yq)\n", " bo = b\n", "else:\n", " xe = np.loadtxt(f\"D:/dnn-engine/test/vectors/{ib+1}_xe.txt\", dtype=np.int64)\n", " bo = bundles[ib+1]\n", - "\n", - "xe_arr = []\n", - "xe_copy = np.copy(xe)\n", - "for ixp in range(bo.p):\n", - " xcm = bo.cm_p0 if ixp==0 else bo.cm\n", - " size = (ROWS+X_PAD)*xcm*bo.w*bo.l*bo.n\n", - " xe_sub_arr = xe_copy[0:size].reshape(bo.n,bo.l,bo.w,xcm,ROWS+X_PAD)\n", - " xe_copy = xe_copy[size:]\n", - " xe_arr += [xe_sub_arr]\n", + " \n", + " xe_arr = []\n", + " xe_copy = np.copy(xe)\n", + " for ixp in range(bo.p):\n", + " xcm = bo.cm_p0 if ixp==0 else bo.cm\n", + " size = (ROWS+X_PAD)*xcm*bo.w*bo.l*bo.n\n", + " xe_sub_arr = xe_copy[0:size].reshape(bo.n,bo.l,bo.w,xcm,ROWS+X_PAD)\n", + " xe_copy = xe_copy[size:]\n", + " xe_arr += [xe_sub_arr]\n", "\n", "ye.size, yq.size, xe.size" ] @@ -90,8 +88,8 @@ { "data": { "text/plain": [ - "(C_Bundle(n=8, l=2, kw=5, coe=4, coe_tl=4, r_ll=2, h=10, w=8, ci=16, co=16, w_kw2=6, t=4, p=4, cm=4, cm_p0=4, w_bpt=488, w_bpt_p0=488, x_bpt=3336, x_bpt_p0=3336, is_bias=0, b_offset=34, b_val_shift=0, b_bias_shift=0, ca_nzero=1, ca_shift=10, ca_pl_scale=3, x_header=1927550536119222272, x_header_p0=1927550536119222272, w_header=1927796989932601344, w_header_p0=1927550536119222272),\n", - " C_Bundle(n=8, l=2, kw=3, coe=8, coe_tl=8, r_ll=2, h=10, w=8, ci=16, co=24, w_kw2=7, t=3, p=3, cm=6, cm_p0=4, w_bpt=440, w_bpt_p0=296, x_bpt=5000, x_bpt_p0=3336, is_bias=1, b_offset=34, b_val_shift=5, b_bias_shift=0, ca_nzero=0, ca_shift=12, ca_pl_scale=0, x_header=3008414446688141312, x_header_p0=1855492942081294336, w_header=3008660883321651200, w_header_p0=1855492942081294336))" + "(C_Bundle(n=1, l=1, kw=1, coe=24, coe_tl=0, r_ll=8, h=8, w=1, ci=7200, co=10, w_kw2=1, t=1, p=360, cm=20, cm_p0=20, w_bpt=488, w_bpt_p0=488, x_bpt=138, x_bpt_p0=138, is_bias=1, conv2dense=0, b_offset=58, b_val_shift=5, b_bias_shift=0, ca_nzero=1, ca_shift=15, ca_pl_scale=3, x_header=10952754293765046272, x_header_p0=10952754293765046272, w_header=10952754456973803520, w_header_p0=10952754293765046272),\n", + " C_Bundle(n=1, l=1, kw=1, coe=24, coe_tl=0, r_ll=8, h=8, w=1, ci=7200, co=10, w_kw2=1, t=1, p=360, cm=20, cm_p0=20, w_bpt=488, w_bpt_p0=488, x_bpt=138, x_bpt_p0=138, is_bias=1, conv2dense=0, b_offset=58, b_val_shift=5, b_bias_shift=0, ca_nzero=1, ca_shift=15, ca_pl_scale=3, x_header=10952754293765046272, x_header_p0=10952754293765046272, w_header=10952754456973803520, w_header_p0=10952754293765046272))" ] }, "execution_count": 2, @@ -146,56 +144,46 @@ "cell_type": "code", "execution_count": 4, "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "0" - ] - }, - "execution_count": 4, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ - "'''\n", - "Python Reshape: y_hwc -> x_engine (bo)\n", - "'''\n", + "if ib != len(bundles)-1:\n", + " '''\n", + " Python Reshape: y_hwc -> x_engine (bo)\n", + " '''\n", "\n", - "x1 = np.copy(yq).reshape(bo.n, bo.h, bo.w, bo.ci)\n", - "x1 = np.pad(x1, ((0,0),(0,ROWS*bo.l-bo.h),(0,0),(0,0))) # (XN, L*HL , XW, CI)\n", - "x1 = x1.reshape (bo.n, bo.l, ROWS, bo.w, bo.ci) # (XN, L, HL, XW, CI)\n", + " x1 = np.copy(yq).reshape(bo.n, bo.h, bo.w, bo.ci)\n", + " x1 = np.pad(x1, ((0,0),(0,ROWS*bo.l-bo.h),(0,0),(0,0))) # (XN, L*HL , XW, CI)\n", + " x1 = x1.reshape (bo.n, bo.l, ROWS, bo.w, bo.ci) # (XN, L, HL, XW, CI)\n", "\n", - "zeros = np.zeros((bo.n, bo.l, ROWS+X_PAD, bo.w, bo.ci),x1.dtype) # (XN,L,ROWS+X_PAD,XW,CI)\n", - "zeros[:,:,:ROWS,:,:] = x1\n", + " zeros = np.zeros((bo.n, bo.l, ROWS+X_PAD, bo.w, bo.ci),x1.dtype) # (XN,L,ROWS+X_PAD,XW,CI)\n", + " zeros[:,:,:ROWS,:,:] = x1\n", "\n", - "''' Fill bot rows from next '''\n", - "for l in range(bo.l):\n", - " if l == bo.l-1:\n", - " zeros[:,l, ROWS: ,:,:] = np.zeros((bo.n,X_PAD,bo.w,bo.ci),x1.dtype)\n", - " else:\n", - " zeros[:,l, ROWS: ,:,:] = x1[:,l+1,:X_PAD,:,:]\n", + " ''' Fill bot rows from next '''\n", + " for l in range(bo.l):\n", + " if l == bo.l-1:\n", + " zeros[:,l, ROWS: ,:,:] = np.zeros((bo.n,X_PAD,bo.w,bo.ci),x1.dtype)\n", + " else:\n", + " zeros[:,l, ROWS: ,:,:] = x1[:,l+1,:X_PAD,:,:]\n", "\n", - "x1 = zeros # (XN,L,ROWS+X_PAD,XW,CI)\n", - "x1 = x1.transpose(0,1,3,4,2) # (XN,L,XW,CI,ROWS+X_PAD)\n", - "x1 = x1.reshape((bo.n, bo.l, bo.w, bo.ci, (ROWS+X_PAD)))\n", + " x1 = zeros # (XN,L,ROWS+X_PAD,XW,CI)\n", + " x1 = x1.transpose(0,1,3,4,2) # (XN,L,XW,CI,ROWS+X_PAD)\n", + " x1 = x1.reshape((bo.n, bo.l, bo.w, bo.ci, (ROWS+X_PAD)))\n", "\n", - "x_list = []\n", - "ic_left = ic_right = 0\n", - "for ip in range(bo.p):\n", - " CM_p = bo.cm_p0 if ip==0 else bo.cm\n", - " ic_right += CM_p\n", + " x_list = []\n", + " ic_left = ic_right = 0\n", + " for ip in range(bo.p):\n", + " CM_p = bo.cm_p0 if ip==0 else bo.cm\n", + " ic_right += CM_p\n", "\n", - " xp = x1[:,:,:, ic_left:ic_right, :] #(XN, L, XW, CM, (ROWS+bo.x_pad))\n", - " assert xp.shape == (bo.n, bo.l, bo.w, CM_p, (ROWS+X_PAD))\n", - " x_list += [xp.flatten()]\n", + " xp = x1[:,:,:, ic_left:ic_right, :] #(XN, L, XW, CM, (ROWS+bo.x_pad))\n", + " assert xp.shape == (bo.n, bo.l, bo.w, CM_p, (ROWS+X_PAD))\n", + " x_list += [xp.flatten()]\n", "\n", - " ic_left = ic_right\n", + " ic_left = ic_right\n", "\n", - "x1 = np.concatenate(x_list)\n", + " x1 = np.concatenate(x_list)\n", "\n", - "np.sum(np.abs(x1 - xe))" + " np.sum(np.abs(x1 - xe))" ] }, { @@ -217,12 +205,12 @@ "source": [ "yq_exp = np.zeros((b.n, b.h, b.w, b.co), dtype=np.int64)\n", "ye_flat = ye.flatten()\n", - "xe_gen = np.zeros(xe.size, dtype=np.int64)\n", + "xe_gen = np.zeros(xe.size, dtype=np.int64) + int(1e6)\n", "\n", "def write_xe_gen(val, ixp, ixn, ixl, ixw, ixcm, ir, bo, X_CMP):\n", " \n", " exp_val = xe_arr[ixp][ixn,ixl,ixw,ixcm,ir]\n", - " assert val == exp_val, f\"{(val, ixp, ixn, ixl, ixw, ixcm, ir, X_CMP)=}\"\n", + " assert val == exp_val, f\"{val=}, {exp_val=} {ixp=}, {(ixn, ixl, ixw, ixcm, ir, X_CMP)=}\"\n", "\n", " pp_n2r = ixn * ( bo.l * bo.w * X_CMP * (ROWS+X_PAD)) \\\n", " + ixl * ( bo.w * X_CMP * (ROWS+X_PAD)) \\\n", @@ -245,8 +233,6 @@ " assert ixl < bo.l , f\"{ixl=} >= {bo.l=}\"\n", " assert ixn < bo.n , f\"{ixn=} >= {bo.n=}\"\n", " assert ixp < bo.p , f\"{ixp=} >= {bo.p=}\"\n", - "\n", - " assert pp < xe_gen.size, f\"{pp=} >= {xe_gen.size=}; {ir=}/{(ROWS+X_PAD)=}, {ixcm=}/{X_CMP=}, {ixw=}/{bo.w=}, {ixl=}/{bo.l=}, {ixn=}/{bo.n=}, {ixp=}/{bo.p=}; {(ROWS+X_PAD)*bo.w*bo.l*bo.n*(bo.cm_p0+(bo.p-1)*bo.cm)=}, {exp_val=}, {val=}\"\n", " return pp\n", "\n", "y_ptr = 0\n", @@ -279,43 +265,52 @@ " yq_exp[i_yn, i_yh, i_yw, i_yc] = val\n", " \n", " '''\n", + " If last bundle, write as NHWC\n", + " '''\n", + " if ib == len(bundles)-1:\n", + " pp = (b.h*b.w*b.co)* i_yn + (b.w*b.co)* i_yh + (b.co)* i_yw + i_yc\n", + " xe_gen[pp] = val\n", + " continue\n", + "\n", + " '''\n", " Calc x coordinates: [p, n, l, w,cmp, r+pad]\n", " '''\n", + " i_xn = i_yn if not b.conv2dense else 0 # N=1\n", + " i_xh = i_yh if not b.conv2dense else i_yn # N -> H\n", + " i_xw = i_yw if not b.conv2dense else 0 # W=1\n", + " i_xc = i_yc if not b.conv2dense else (b.w*b.co)* i_yh + (b.co)* i_yw + i_yc # (H*W*C) -> C\n", "\n", - " i_xn = i_n\n", - " i_xw = i_yw\n", - " i_xh = i_yh\n", " i_xr = i_xh % ROWS\n", " i_xl = i_xh // ROWS\n", "\n", - " if i_yc < bo.cm_p0:\n", + " if i_xc < bo.cm_p0:\n", " i_xp = 0\n", - " i_xcm = i_yc\n", + " i_xcm = i_xc\n", " X_CMP = bo.cm_p0\n", " else:\n", - " i_xp = (i_yc - bo.cm_p0) // bo.cm + 1\n", - " i_xcm = (i_yc - bo.cm_p0) % bo.cm\n", + " i_xp = (i_xc - bo.cm_p0) // bo.cm + 1\n", + " i_xcm = (i_xc - bo.cm_p0) % bo.cm\n", " X_CMP = bo.cm\n", "\n", + "\n", " ''' Write Val '''\n", " write_xe_gen(val, i_xp, i_xn, i_xl, i_xw, i_xcm, i_xr, bo, X_CMP)\n", "\n", - "\n", " ''' Padding the [bottom X_PAD rows of previous block (l-1)] with [first X_PAD rows of this block (l)]'''\n", - " if i_xr < X_PAD:\n", - " if i_xl == 0:\n", - " write_xe_gen(0, i_xp, i_xn, bo.l-1, i_xw, i_xcm, i_xr+ROWS, bo, X_CMP)\n", - " # print(xp, xe_gen[xp], 'pad zero')\n", - " else:\n", - " write_xe_gen(val, i_xp, i_xn, i_xl-1, i_xw, i_xcm, i_xr+ROWS, bo, X_CMP)\n", - " # print(xp, xe_gen[xp], 'pad val')\n", - " \n", - " # if (i_l == bo.l-1) and (i_xr == bo.r_ll-1):\n", - " # '''Last row of last block in y, but i_xr is not complete (need zero padding for block)'''\n", - " # write_xe_gen(0,i_xp, i_xn, bo.l-1, i_xw, i_xcm, i_xr+ROWS, bo, X_CMP)\n", - " # i_xr += 1\n", - " # else:\n", - " # break\n", + " if i_xr < X_PAD: \n", + " pad_val = 0 if (i_xl == 0) else val\n", + " dest_xl = bo.l-1 if (i_xl == 0) else i_xl-1\n", + " write_xe_gen(pad_val, i_xp, i_xn, dest_xl, i_xw, i_xcm, i_xr+ROWS, bo, X_CMP)\n", + " \n", + " ''' Pad L*ROWS-H rows with zeros, and pad their other blocks accordingly'''\n", + " if (i_xl == bo.l-1) and (i_xr == bo.r_ll-1):\n", + " for ir_hpad in range(bo.r_ll, ROWS):\n", + " write_xe_gen(0, i_xp, i_xn, i_xl, i_xw, i_xcm, ir_hpad, bo, X_CMP)\n", + "\n", + " if ir_hpad < X_PAD: \n", + " dest_xl = bo.l-1 if (i_xl == 0) else i_xl-1\n", + " write_xe_gen(0, i_xp, i_xn, dest_xl, i_xw, i_xcm, ir_hpad+ROWS, bo, X_CMP)\n", + " \n", "\n", " \n", "\n",