diff --git a/README.md b/README.md index b79a66e..abed7af 100644 --- a/README.md +++ b/README.md @@ -143,7 +143,7 @@ The low-level implementations of kernels are under `nn/impl/`. For the low-level functions, the following naming convention is used: -`void NN_operator_datatype(size_t n, , );` +`void nn_operator_datatype(size_t n, , );` `operator`: the name of the operator, such as `add`, `max`. diff --git a/docs/Tensor-Basics.md b/docs/Tensor-Basics.md index 7aaaf46..cfd1117 100644 --- a/docs/Tensor-Basics.md +++ b/docs/Tensor-Basics.md @@ -9,9 +9,9 @@ The underlying fundamental operators will be statically typed, and hence the ten Tensor attributes describe their dimension, shape, number of elements, and datatype. ```c -Tensor *tensor = NN_rand(2, (size_t []){ 3, 4 }, DTYPE_F32); +Tensor *tensor = nn_rand(2, (size_t []){ 3, 4 }, DTYPE_F32); -printf("Datatype of tensor: %s\n", NN_get_datatype_name(tensor->dtype)); +printf("Datatype of tensor: %s\n", nn_get_datatype_name(tensor->dtype)); printf("Dimension of tensor: %d\n", tensor->ndim); printf("Shape of tensor: (%d, %d)\n", tensor->shape[0], tensor->shape[1]); printf("Number of elements: %d\n", tensor->size); diff --git a/docs/Tensor-Creation.md b/docs/Tensor-Creation.md index 8041389..ff7b9f3 100644 --- a/docs/Tensor-Creation.md +++ b/docs/Tensor-Creation.md @@ -14,14 +14,14 @@ Tensor *NN_(, , , ) The following factory functions are available at the time of this writing: -#### NN_tensor() +#### nn_tensor() Returns a tensor with uninitialized values or preallocated buffer. When passing NULL as the data buffer, the method will allocate a new chunk of uninitialized data chunk. ```c -Tensor *tensor = NN_tensor(2, (size_t []){ 2, 2 }, DTYPE_F32, NULL); +Tensor *tensor = nn_tensor(2, (size_t []){ 2, 2 }, DTYPE_F32, NULL); ``` Alternatively, tensor be created directly from an existing data buffer. @@ -29,30 +29,30 @@ Alternatively, tensor be created directly from an existing data buffer. ```c // data = [[1, 2], [3, 4]] float data[] = { 1, 2, 3, 4 }; -Tensor *tensor = NN_tensor(2, (size_t []){ 2, 2 }, DTYPE_F32, data); +Tensor *tensor = nn_tensor(2, (size_t []){ 2, 2 }, DTYPE_F32, data); ``` -#### NN_zeros() +#### nn_zeros() Returns a tensor filled with all zeros. -#### NN_ones() +#### nn_ones() Returns a tensor filled with all ones. -#### NN_full() +#### nn_full() Returns a tensor filled with a single value. -#### NN_rand() +#### nn_rand() Returns a tensor filled with values drawn from a uniform distribution on [0, 1). -#### NN_randint() +#### nn_randint() Returns a tensor with integers randomly drawn from an interval. -#### NN_arange() +#### nn_arange() Returns a tensor with a sequence of integers. diff --git a/examples/char-rnn/runtime_test_c/char-rnn.c b/examples/char-rnn/runtime_test_c/char-rnn.c index 309a977..1ada30a 100644 --- a/examples/char-rnn/runtime_test_c/char-rnn.c +++ b/examples/char-rnn/runtime_test_c/char-rnn.c @@ -40,13 +40,13 @@ int main() { printf("\n\n"); Matrix output; - NN_initMatrix(&output, 1, output_size); + nn_initMatrix(&output, 1, output_size); Matrix input; - NN_initMatrix(&input, 1, input_size + hidden_size); + nn_initMatrix(&input, 1, input_size + hidden_size); Matrix hidden; - NN_initMatrix(&hidden, 1, hidden_size); + nn_initMatrix(&hidden, 1, hidden_size); int index; @@ -57,17 +57,17 @@ int main() { for (int j=1; j %s\n", str); printf("score: ("); - NN_print_f32(output.data[index], 2); + nn_print_f32(output.data[index], 2); printf("), predicted: (%d, %s)\n", index, categories[index]); } diff --git a/examples/char-rnn/runtime_test_c/model.h b/examples/char-rnn/runtime_test_c/model.h index b89c5e9..2a069d9 100644 --- a/examples/char-rnn/runtime_test_c/model.h +++ b/examples/char-rnn/runtime_test_c/model.h @@ -35,11 +35,11 @@ static void forward(Matrix *output, Matrix *hidden, Matrix *input) { // Input Matrix *input_out = input; // Linear - NN_linear(hidden, &i2h_weight_transposed, &i2h_bias, input_out); + nn_linear(hidden, &i2h_weight_transposed, &i2h_bias, input_out); // Linear - NN_linear(output, &h2o_weight_transposed, &h2o_bias, hidden); + nn_linear(output, &h2o_weight_transposed, &h2o_bias, hidden); // Log Softmax - NN_logSoftmax(output, output); + nn_logSoftmax(output, output); } #endif // __MODEL_H diff --git a/examples/char-rnn/runtime_test_c/nn.h b/examples/char-rnn/runtime_test_c/nn.h index b6fdc01..f2a939f 100644 --- a/examples/char-rnn/runtime_test_c/nn.h +++ b/examples/char-rnn/runtime_test_c/nn.h @@ -17,7 +17,7 @@ typedef struct { * ====== Utility Functions ====== */ -void NN_assert(int condition, char *message) { +void nn_assert(int condition, char *message) { if (!condition) { printf("Assertion failed: "); printf("%s\n", message); @@ -31,7 +31,7 @@ void NN_assert(int condition, char *message) { * These functions assumes that printf is available. */ -void NN_print_f32(float v, int16_t num_digits) { +void nn_print_f32(float v, int16_t num_digits) { int32_t scale = 1; int32_t integer_part, fractional_part; while (num_digits != 0) { @@ -46,14 +46,14 @@ void NN_print_f32(float v, int16_t num_digits) { printf("%i.%i", integer_part, fractional_part); } -void NN_print_shape(Matrix *a) { +void nn_print_shape(Matrix *a) { printf("(%d, %d)\n", a->rows, a->cols); } -void NN_printMatrix(Matrix *a) { +void nn_printMatrix(Matrix *a) { for (size_t i = 0; i < a->rows; i++) { for (size_t j = 0; j < a->cols; j++) { - NN_print_f32(a->data[i * a->cols + j], 2); + nn_print_f32(a->data[i * a->cols + j], 2); printf(" "); } printf("\n"); @@ -64,16 +64,16 @@ void NN_printMatrix(Matrix *a) { /* * ====== Math Functions ====== */ -void NN_initMatrix(Matrix *m, size_t rows, size_t cols) { +void nn_initMatrix(Matrix *m, size_t rows, size_t cols) { m->rows = rows; m->cols = cols; m->data = malloc(rows * cols * sizeof(float)); } -void NN_matmul(Matrix *out, Matrix *a, Matrix *b) { - NN_assert(a->cols == b->rows, "matmul: dimension mismatch"); - NN_assert(out->rows == a->rows, "matmul: dimension mismatch"); - NN_assert(out->cols == b->cols, "matmul: dimension mismatch"); +void nn_matmul(Matrix *out, Matrix *a, Matrix *b) { + nn_assert(a->cols == b->rows, "matmul: dimension mismatch"); + nn_assert(out->rows == a->rows, "matmul: dimension mismatch"); + nn_assert(out->cols == b->cols, "matmul: dimension mismatch"); for (size_t i = 0; i < a->rows; i += 1) { for (size_t j = 0; j < b->cols; j += 1) { float sum = 0; @@ -85,9 +85,9 @@ void NN_matmul(Matrix *out, Matrix *a, Matrix *b) { } } -void NN_matadd(Matrix *out, Matrix *a, Matrix *b) { - NN_assert(a->rows == b->rows, "matadd: dimension mismatch"); - NN_assert(a->cols == b->cols, "matadd: dimension mismatch"); +void nn_matadd(Matrix *out, Matrix *a, Matrix *b) { + nn_assert(a->rows == b->rows, "matadd: dimension mismatch"); + nn_assert(a->cols == b->cols, "matadd: dimension mismatch"); for (size_t i = 0; i < a->rows; i += 1) { for (size_t j = 0; j < a->cols; j += 1) { out->data[i * out->cols + j] = a->data[i * a->cols + j] + b->data[i * b->cols + j]; @@ -95,7 +95,7 @@ void NN_matadd(Matrix *out, Matrix *a, Matrix *b) { } } -void NN_transpose(Matrix *out, Matrix *a) { +void nn_transpose(Matrix *out, Matrix *a) { for (size_t i = 0; i < a->rows; i += 1) { for (size_t j = 0; j < a->cols; j += 1) { out->data[j * out->cols + i] = a->data[i * a->cols + j]; @@ -103,7 +103,7 @@ void NN_transpose(Matrix *out, Matrix *a) { } } -void NN_concatenate(Matrix *out, Matrix *a, Matrix *b) { +void nn_concatenate(Matrix *out, Matrix *a, Matrix *b) { for (size_t i = 0; i < a->cols; i += 1) { out->data[i] = a->data[i]; } @@ -112,7 +112,7 @@ void NN_concatenate(Matrix *out, Matrix *a, Matrix *b) { } } -size_t NN_argmax(Matrix *a) { +size_t nn_argmax(Matrix *a) { int max_index = 0; float max_value = a->data[0]; for (size_t i = 1; i < a->cols; i += 1) { @@ -128,12 +128,12 @@ size_t NN_argmax(Matrix *a) { * ====== Operators ====== */ -void NN_linear(Matrix *out, Matrix *weight, Matrix *bias, Matrix *input) { - NN_matmul(out, input, weight); - NN_matadd(out, out, bias); +void nn_linear(Matrix *out, Matrix *weight, Matrix *bias, Matrix *input) { + nn_matmul(out, input, weight); + nn_matadd(out, out, bias); } -void NN_logSoftmax(Matrix *out, Matrix *a) { +void nn_logSoftmax(Matrix *out, Matrix *a) { float sum = 0; for (size_t i = 0; i < a->cols; i += 1) { sum += exp(a->data[i]); diff --git a/examples/char-rnn/runtime_test_np/model.py b/examples/char-rnn/runtime_test_np/model.py index 607fc36..18e3a89 100644 --- a/examples/char-rnn/runtime_test_np/model.py +++ b/examples/char-rnn/runtime_test_np/model.py @@ -8,9 +8,9 @@ def forward(input): # Input input_out = input # Linear - i2h_out = NN_linear(input_out, i2h_weight_transposed, i2h_bias) + i2h_out = nn_linear(input_out, i2h_weight_transposed, i2h_bias) # Linear - h2o_out = NN_linear(i2h_out, h2o_weight_transposed, h2o_bias) + h2o_out = nn_linear(i2h_out, h2o_weight_transposed, h2o_bias) # Log Softmax softmax_out = nn_logsoftmax(h2o_out) return softmax_out, i2h_out diff --git a/examples/char-rnn/runtime_test_np/nn.py b/examples/char-rnn/runtime_test_np/nn.py index 1caa9b6..c09f2ca 100644 --- a/examples/char-rnn/runtime_test_np/nn.py +++ b/examples/char-rnn/runtime_test_np/nn.py @@ -1,6 +1,6 @@ import numpy as np -def NN_linear(input, weight_T, bias): +def nn_linear(input, weight_T, bias): return np.matmul(input, weight_T) + bias def nn_logsoftmax(input): diff --git a/examples/diffuse-loco/main.c b/examples/diffuse-loco/main.c index 33b9e1b..6ac4ecd 100644 --- a/examples/diffuse-loco/main.c +++ b/examples/diffuse-loco/main.c @@ -33,7 +33,7 @@ int main() { init(model); printf("setting input data...\n"); - NN_fill(&model->input_1, 1.0); + nn_fill(&model->input_1, 1.0); // cycles = READ_CSR("mcycle"); forward(model); @@ -44,7 +44,7 @@ int main() { // output tensor([[ 0.0258, -0.0050, 0.0902, -0.0022, -0.0924, -0.0574, 0.0328, 0.0386, -0.0277, 0.0788, 0.0603, -0.0085]]) printf("output:\n"); - NN_printf(&model->actor_6); + nn_printf(&model->actor_6); return 0; } diff --git a/examples/diffuse-loco/model.h b/examples/diffuse-loco/model.h index 7e8021b..c8ca372 100644 --- a/examples/diffuse-loco/model.h +++ b/examples/diffuse-loco/model.h @@ -41,44 +41,44 @@ void forward(Model *model); void init(Model *model) { float *array_pointer = (float *)model_weight_data; - NN_init_tensor(&model->input_1, 2, (size_t[]){1, 48}, DTYPE_F32, NULL); + nn_init_tensor(&model->input_1, 2, (size_t[]){1, 48}, DTYPE_F32, NULL); // : actor_0 - NN_init_tensor(&model->actor_0_weight, 2, (size_t[]){512, 48}, DTYPE_F32, array_pointer); + nn_init_tensor(&model->actor_0_weight, 2, (size_t[]){512, 48}, DTYPE_F32, array_pointer); array_pointer += 24576; - NN_init_tensor(&model->actor_0_bias, 1, (size_t[]){512}, DTYPE_F32, array_pointer); + nn_init_tensor(&model->actor_0_bias, 1, (size_t[]){512}, DTYPE_F32, array_pointer); array_pointer += 512; - NN_init_tensor(&model->actor_0, 2, (size_t[]){1, 512}, DTYPE_F32, NULL); + nn_init_tensor(&model->actor_0, 2, (size_t[]){1, 512}, DTYPE_F32, NULL); // : actor_1 - NN_init_tensor(&model->actor_1, 2, (size_t[]){1, 512}, DTYPE_F32, NULL); + nn_init_tensor(&model->actor_1, 2, (size_t[]){1, 512}, DTYPE_F32, NULL); // : actor_2 - NN_init_tensor(&model->actor_2_weight, 2, (size_t[]){256, 512}, DTYPE_F32, array_pointer); + nn_init_tensor(&model->actor_2_weight, 2, (size_t[]){256, 512}, DTYPE_F32, array_pointer); array_pointer += 131072; - NN_init_tensor(&model->actor_2_bias, 1, (size_t[]){256}, DTYPE_F32, array_pointer); + nn_init_tensor(&model->actor_2_bias, 1, (size_t[]){256}, DTYPE_F32, array_pointer); array_pointer += 256; - NN_init_tensor(&model->actor_2, 2, (size_t[]){1, 256}, DTYPE_F32, NULL); + nn_init_tensor(&model->actor_2, 2, (size_t[]){1, 256}, DTYPE_F32, NULL); // : actor_3 - NN_init_tensor(&model->actor_3, 2, (size_t[]){1, 256}, DTYPE_F32, NULL); + nn_init_tensor(&model->actor_3, 2, (size_t[]){1, 256}, DTYPE_F32, NULL); // : actor_4 - NN_init_tensor(&model->actor_4_weight, 2, (size_t[]){128, 256}, DTYPE_F32, array_pointer); + nn_init_tensor(&model->actor_4_weight, 2, (size_t[]){128, 256}, DTYPE_F32, array_pointer); array_pointer += 32768; - NN_init_tensor(&model->actor_4_bias, 1, (size_t[]){128}, DTYPE_F32, array_pointer); + nn_init_tensor(&model->actor_4_bias, 1, (size_t[]){128}, DTYPE_F32, array_pointer); array_pointer += 128; - NN_init_tensor(&model->actor_4, 2, (size_t[]){1, 128}, DTYPE_F32, NULL); + nn_init_tensor(&model->actor_4, 2, (size_t[]){1, 128}, DTYPE_F32, NULL); // : actor_5 - NN_init_tensor(&model->actor_5, 2, (size_t[]){1, 128}, DTYPE_F32, NULL); + nn_init_tensor(&model->actor_5, 2, (size_t[]){1, 128}, DTYPE_F32, NULL); // : actor_6 - NN_init_tensor(&model->actor_6_weight, 2, (size_t[]){12, 128}, DTYPE_F32, array_pointer); + nn_init_tensor(&model->actor_6_weight, 2, (size_t[]){12, 128}, DTYPE_F32, array_pointer); array_pointer += 1536; - NN_init_tensor(&model->actor_6_bias, 1, (size_t[]){12}, DTYPE_F32, array_pointer); + nn_init_tensor(&model->actor_6_bias, 1, (size_t[]){12}, DTYPE_F32, array_pointer); array_pointer += 12; - NN_init_tensor(&model->actor_6, 2, (size_t[]){1, 12}, DTYPE_F32, NULL); + nn_init_tensor(&model->actor_6, 2, (size_t[]){1, 12}, DTYPE_F32, NULL); } @@ -87,13 +87,13 @@ void init(Model *model) { * Forward pass of the model */ void forward(Model *model) { - NN_linear(&model->actor_0, &model->input_1, &model->actor_0_weight, &model->actor_0_bias); - NN_elu(&model->actor_1, &model->actor_0, 1.0); - NN_linear(&model->actor_2, &model->actor_1, &model->actor_2_weight, &model->actor_2_bias); - NN_elu(&model->actor_3, &model->actor_2, 1.0); - NN_linear(&model->actor_4, &model->actor_3, &model->actor_4_weight, &model->actor_4_bias); - NN_elu(&model->actor_5, &model->actor_4, 1.0); - NN_linear(&model->actor_6, &model->actor_5, &model->actor_6_weight, &model->actor_6_bias); + nn_linear(&model->actor_0, &model->input_1, &model->actor_0_weight, &model->actor_0_bias); + nn_elu(&model->actor_1, &model->actor_0, 1.0); + nn_linear(&model->actor_2, &model->actor_1, &model->actor_2_weight, &model->actor_2_bias); + nn_elu(&model->actor_3, &model->actor_2, 1.0); + nn_linear(&model->actor_4, &model->actor_3, &model->actor_4_weight, &model->actor_4_bias); + nn_elu(&model->actor_5, &model->actor_4, 1.0); + nn_linear(&model->actor_6, &model->actor_5, &model->actor_6_weight, &model->actor_6_bias); } diff --git a/examples/fast-depth/main.c b/examples/fast-depth/main.c index f7e90f1..e4a74d1 100644 --- a/examples/fast-depth/main.c +++ b/examples/fast-depth/main.c @@ -46,7 +46,7 @@ int main() { init(model); printf("setting input data...\n"); - // NN_fill(&model->x, 0.0); + // nn_fill(&model->x, 0.0); memcpy((uint8_t *)model->x.data, (uint8_t *)model_input_data, (size_t)model_input_end - (size_t)model_input_start); // cycles = READ_CSR("mcycle"); @@ -55,9 +55,9 @@ int main() { printf("cycles: %lu\n", cycles); - Tensor *img = NN_tensor(4, (const size_t[]){1, model->decode_conv6_2.shape[1] / 8, model->decode_conv6_2.shape[2] / 4, 1}, DTYPE_F32, NULL); + Tensor *img = nn_tensor(4, (const size_t[]){1, model->decode_conv6_2.shape[1] / 8, model->decode_conv6_2.shape[2] / 4, 1}, DTYPE_F32, NULL); - NN_interpolate(img, &model->decode_conv6_2, (float []){0.125, 0.25}); + nn_interpolate(img, &model->decode_conv6_2, (float []){0.125, 0.25}); printf("output:\n"); show_ASCII_image(img, 0, 0); diff --git a/examples/fast-depth/model.h b/examples/fast-depth/model.h index 23a083a..e6c56d7 100644 --- a/examples/fast-depth/model.h +++ b/examples/fast-depth/model.h @@ -338,737 +338,737 @@ void forward(Model *model); void init(Model *model) { float *array_pointer = (float *)model_weight_data; - NN_init_tensor(&model->x, 4, (size_t[]){1, 224, 224, 3}, DTYPE_F32, NULL); + nn_init_tensor(&model->x, 4, (size_t[]){1, 224, 224, 3}, DTYPE_F32, NULL); // : conv0_0 - NN_init_tensor(&model->conv0_0_weight, 4, (size_t[]){3, 3, 3, 16}, DTYPE_F32, array_pointer); + nn_init_tensor(&model->conv0_0_weight, 4, (size_t[]){3, 3, 3, 16}, DTYPE_F32, array_pointer); array_pointer += 432; - NN_init_tensor(&model->conv0_0, 4, (size_t[]){1, 112, 112, 16}, DTYPE_F32, NULL); + nn_init_tensor(&model->conv0_0, 4, (size_t[]){1, 112, 112, 16}, DTYPE_F32, NULL); // : conv0_1 - NN_init_tensor(&model->conv0_1_weight, 1, (size_t[]){16}, DTYPE_F32, array_pointer); + nn_init_tensor(&model->conv0_1_weight, 1, (size_t[]){16}, DTYPE_F32, array_pointer); array_pointer += 16; - NN_init_tensor(&model->conv0_1_bias, 1, (size_t[]){16}, DTYPE_F32, array_pointer); + nn_init_tensor(&model->conv0_1_bias, 1, (size_t[]){16}, DTYPE_F32, array_pointer); array_pointer += 16; - NN_init_tensor(&model->conv0_1_running_mean, 1, (size_t[]){16}, DTYPE_F32, array_pointer); + nn_init_tensor(&model->conv0_1_running_mean, 1, (size_t[]){16}, DTYPE_F32, array_pointer); array_pointer += 16; - NN_init_tensor(&model->conv0_1_running_var, 1, (size_t[]){16}, DTYPE_F32, array_pointer); + nn_init_tensor(&model->conv0_1_running_var, 1, (size_t[]){16}, DTYPE_F32, array_pointer); array_pointer += 16; - NN_init_tensor(&model->conv0_1, 4, (size_t[]){1, 112, 112, 16}, DTYPE_F32, NULL); + nn_init_tensor(&model->conv0_1, 4, (size_t[]){1, 112, 112, 16}, DTYPE_F32, NULL); // : conv0_2 - NN_init_tensor(&model->conv0_2, 4, (size_t[]){1, 112, 112, 16}, DTYPE_F32, NULL); + nn_init_tensor(&model->conv0_2, 4, (size_t[]){1, 112, 112, 16}, DTYPE_F32, NULL); // : conv1_0 - NN_init_tensor(&model->conv1_0_weight, 4, (size_t[]){3, 3, 1, 16}, DTYPE_F32, array_pointer); + nn_init_tensor(&model->conv1_0_weight, 4, (size_t[]){3, 3, 1, 16}, DTYPE_F32, array_pointer); array_pointer += 144; - NN_init_tensor(&model->conv1_0, 4, (size_t[]){1, 112, 112, 16}, DTYPE_F32, NULL); + nn_init_tensor(&model->conv1_0, 4, (size_t[]){1, 112, 112, 16}, DTYPE_F32, NULL); // : conv1_1 - NN_init_tensor(&model->conv1_1_weight, 1, (size_t[]){16}, DTYPE_F32, array_pointer); + nn_init_tensor(&model->conv1_1_weight, 1, (size_t[]){16}, DTYPE_F32, array_pointer); array_pointer += 16; - NN_init_tensor(&model->conv1_1_bias, 1, (size_t[]){16}, DTYPE_F32, array_pointer); + nn_init_tensor(&model->conv1_1_bias, 1, (size_t[]){16}, DTYPE_F32, array_pointer); array_pointer += 16; - NN_init_tensor(&model->conv1_1_running_mean, 1, (size_t[]){16}, DTYPE_F32, array_pointer); + nn_init_tensor(&model->conv1_1_running_mean, 1, (size_t[]){16}, DTYPE_F32, array_pointer); array_pointer += 16; - NN_init_tensor(&model->conv1_1_running_var, 1, (size_t[]){16}, DTYPE_F32, array_pointer); + nn_init_tensor(&model->conv1_1_running_var, 1, (size_t[]){16}, DTYPE_F32, array_pointer); array_pointer += 16; - NN_init_tensor(&model->conv1_1, 4, (size_t[]){1, 112, 112, 16}, DTYPE_F32, NULL); + nn_init_tensor(&model->conv1_1, 4, (size_t[]){1, 112, 112, 16}, DTYPE_F32, NULL); // : conv1_2 - NN_init_tensor(&model->conv1_2, 4, (size_t[]){1, 112, 112, 16}, DTYPE_F32, NULL); + nn_init_tensor(&model->conv1_2, 4, (size_t[]){1, 112, 112, 16}, DTYPE_F32, NULL); // : conv1_3 - NN_init_tensor(&model->conv1_3_weight, 4, (size_t[]){1, 1, 16, 56}, DTYPE_F32, array_pointer); + nn_init_tensor(&model->conv1_3_weight, 4, (size_t[]){1, 1, 16, 56}, DTYPE_F32, array_pointer); array_pointer += 896; - NN_init_tensor(&model->conv1_3, 4, (size_t[]){1, 112, 112, 56}, DTYPE_F32, NULL); + nn_init_tensor(&model->conv1_3, 4, (size_t[]){1, 112, 112, 56}, DTYPE_F32, NULL); // : conv1_4 - NN_init_tensor(&model->conv1_4_weight, 1, (size_t[]){56}, DTYPE_F32, array_pointer); + nn_init_tensor(&model->conv1_4_weight, 1, (size_t[]){56}, DTYPE_F32, array_pointer); array_pointer += 56; - NN_init_tensor(&model->conv1_4_bias, 1, (size_t[]){56}, DTYPE_F32, array_pointer); + nn_init_tensor(&model->conv1_4_bias, 1, (size_t[]){56}, DTYPE_F32, array_pointer); array_pointer += 56; - NN_init_tensor(&model->conv1_4_running_mean, 1, (size_t[]){56}, DTYPE_F32, array_pointer); + nn_init_tensor(&model->conv1_4_running_mean, 1, (size_t[]){56}, DTYPE_F32, array_pointer); array_pointer += 56; - NN_init_tensor(&model->conv1_4_running_var, 1, (size_t[]){56}, DTYPE_F32, array_pointer); + nn_init_tensor(&model->conv1_4_running_var, 1, (size_t[]){56}, DTYPE_F32, array_pointer); array_pointer += 56; - NN_init_tensor(&model->conv1_4, 4, (size_t[]){1, 112, 112, 56}, DTYPE_F32, NULL); + nn_init_tensor(&model->conv1_4, 4, (size_t[]){1, 112, 112, 56}, DTYPE_F32, NULL); // : conv1_5 - NN_init_tensor(&model->conv1_5, 4, (size_t[]){1, 112, 112, 56}, DTYPE_F32, NULL); + nn_init_tensor(&model->conv1_5, 4, (size_t[]){1, 112, 112, 56}, DTYPE_F32, NULL); // : conv2_0 - NN_init_tensor(&model->conv2_0_weight, 4, (size_t[]){3, 3, 1, 56}, DTYPE_F32, array_pointer); + nn_init_tensor(&model->conv2_0_weight, 4, (size_t[]){3, 3, 1, 56}, DTYPE_F32, array_pointer); array_pointer += 504; - NN_init_tensor(&model->conv2_0, 4, (size_t[]){1, 56, 56, 56}, DTYPE_F32, NULL); + nn_init_tensor(&model->conv2_0, 4, (size_t[]){1, 56, 56, 56}, DTYPE_F32, NULL); // : conv2_1 - NN_init_tensor(&model->conv2_1_weight, 1, (size_t[]){56}, DTYPE_F32, array_pointer); + nn_init_tensor(&model->conv2_1_weight, 1, (size_t[]){56}, DTYPE_F32, array_pointer); array_pointer += 56; - NN_init_tensor(&model->conv2_1_bias, 1, (size_t[]){56}, DTYPE_F32, array_pointer); + nn_init_tensor(&model->conv2_1_bias, 1, (size_t[]){56}, DTYPE_F32, array_pointer); array_pointer += 56; - NN_init_tensor(&model->conv2_1_running_mean, 1, (size_t[]){56}, DTYPE_F32, array_pointer); + nn_init_tensor(&model->conv2_1_running_mean, 1, (size_t[]){56}, DTYPE_F32, array_pointer); array_pointer += 56; - NN_init_tensor(&model->conv2_1_running_var, 1, (size_t[]){56}, DTYPE_F32, array_pointer); + nn_init_tensor(&model->conv2_1_running_var, 1, (size_t[]){56}, DTYPE_F32, array_pointer); array_pointer += 56; - NN_init_tensor(&model->conv2_1, 4, (size_t[]){1, 56, 56, 56}, DTYPE_F32, NULL); + nn_init_tensor(&model->conv2_1, 4, (size_t[]){1, 56, 56, 56}, DTYPE_F32, NULL); // : conv2_2 - NN_init_tensor(&model->conv2_2, 4, (size_t[]){1, 56, 56, 56}, DTYPE_F32, NULL); + nn_init_tensor(&model->conv2_2, 4, (size_t[]){1, 56, 56, 56}, DTYPE_F32, NULL); // : conv2_3 - NN_init_tensor(&model->conv2_3_weight, 4, (size_t[]){1, 1, 56, 88}, DTYPE_F32, array_pointer); + nn_init_tensor(&model->conv2_3_weight, 4, (size_t[]){1, 1, 56, 88}, DTYPE_F32, array_pointer); array_pointer += 4928; - NN_init_tensor(&model->conv2_3, 4, (size_t[]){1, 56, 56, 88}, DTYPE_F32, NULL); + nn_init_tensor(&model->conv2_3, 4, (size_t[]){1, 56, 56, 88}, DTYPE_F32, NULL); // : conv2_4 - NN_init_tensor(&model->conv2_4_weight, 1, (size_t[]){88}, DTYPE_F32, array_pointer); + nn_init_tensor(&model->conv2_4_weight, 1, (size_t[]){88}, DTYPE_F32, array_pointer); array_pointer += 88; - NN_init_tensor(&model->conv2_4_bias, 1, (size_t[]){88}, DTYPE_F32, array_pointer); + nn_init_tensor(&model->conv2_4_bias, 1, (size_t[]){88}, DTYPE_F32, array_pointer); array_pointer += 88; - NN_init_tensor(&model->conv2_4_running_mean, 1, (size_t[]){88}, DTYPE_F32, array_pointer); + nn_init_tensor(&model->conv2_4_running_mean, 1, (size_t[]){88}, DTYPE_F32, array_pointer); array_pointer += 88; - NN_init_tensor(&model->conv2_4_running_var, 1, (size_t[]){88}, DTYPE_F32, array_pointer); + nn_init_tensor(&model->conv2_4_running_var, 1, (size_t[]){88}, DTYPE_F32, array_pointer); array_pointer += 88; - NN_init_tensor(&model->conv2_4, 4, (size_t[]){1, 56, 56, 88}, DTYPE_F32, NULL); + nn_init_tensor(&model->conv2_4, 4, (size_t[]){1, 56, 56, 88}, DTYPE_F32, NULL); // : conv2_5 - NN_init_tensor(&model->conv2_5, 4, (size_t[]){1, 56, 56, 88}, DTYPE_F32, NULL); + nn_init_tensor(&model->conv2_5, 4, (size_t[]){1, 56, 56, 88}, DTYPE_F32, NULL); // : conv3_0 - NN_init_tensor(&model->conv3_0_weight, 4, (size_t[]){3, 3, 1, 88}, DTYPE_F32, array_pointer); + nn_init_tensor(&model->conv3_0_weight, 4, (size_t[]){3, 3, 1, 88}, DTYPE_F32, array_pointer); array_pointer += 792; - NN_init_tensor(&model->conv3_0, 4, (size_t[]){1, 56, 56, 88}, DTYPE_F32, NULL); + nn_init_tensor(&model->conv3_0, 4, (size_t[]){1, 56, 56, 88}, DTYPE_F32, NULL); // : conv3_1 - NN_init_tensor(&model->conv3_1_weight, 1, (size_t[]){88}, DTYPE_F32, array_pointer); + nn_init_tensor(&model->conv3_1_weight, 1, (size_t[]){88}, DTYPE_F32, array_pointer); array_pointer += 88; - NN_init_tensor(&model->conv3_1_bias, 1, (size_t[]){88}, DTYPE_F32, array_pointer); + nn_init_tensor(&model->conv3_1_bias, 1, (size_t[]){88}, DTYPE_F32, array_pointer); array_pointer += 88; - NN_init_tensor(&model->conv3_1_running_mean, 1, (size_t[]){88}, DTYPE_F32, array_pointer); + nn_init_tensor(&model->conv3_1_running_mean, 1, (size_t[]){88}, DTYPE_F32, array_pointer); array_pointer += 88; - NN_init_tensor(&model->conv3_1_running_var, 1, (size_t[]){88}, DTYPE_F32, array_pointer); + nn_init_tensor(&model->conv3_1_running_var, 1, (size_t[]){88}, DTYPE_F32, array_pointer); array_pointer += 88; - NN_init_tensor(&model->conv3_1, 4, (size_t[]){1, 56, 56, 88}, DTYPE_F32, NULL); + nn_init_tensor(&model->conv3_1, 4, (size_t[]){1, 56, 56, 88}, DTYPE_F32, NULL); // : conv3_2 - NN_init_tensor(&model->conv3_2, 4, (size_t[]){1, 56, 56, 88}, DTYPE_F32, NULL); + nn_init_tensor(&model->conv3_2, 4, (size_t[]){1, 56, 56, 88}, DTYPE_F32, NULL); // : conv3_3 - NN_init_tensor(&model->conv3_3_weight, 4, (size_t[]){1, 1, 88, 120}, DTYPE_F32, array_pointer); + nn_init_tensor(&model->conv3_3_weight, 4, (size_t[]){1, 1, 88, 120}, DTYPE_F32, array_pointer); array_pointer += 10560; - NN_init_tensor(&model->conv3_3, 4, (size_t[]){1, 56, 56, 120}, DTYPE_F32, NULL); + nn_init_tensor(&model->conv3_3, 4, (size_t[]){1, 56, 56, 120}, DTYPE_F32, NULL); // : conv3_4 - NN_init_tensor(&model->conv3_4_weight, 1, (size_t[]){120}, DTYPE_F32, array_pointer); + nn_init_tensor(&model->conv3_4_weight, 1, (size_t[]){120}, DTYPE_F32, array_pointer); array_pointer += 120; - NN_init_tensor(&model->conv3_4_bias, 1, (size_t[]){120}, DTYPE_F32, array_pointer); + nn_init_tensor(&model->conv3_4_bias, 1, (size_t[]){120}, DTYPE_F32, array_pointer); array_pointer += 120; - NN_init_tensor(&model->conv3_4_running_mean, 1, (size_t[]){120}, DTYPE_F32, array_pointer); + nn_init_tensor(&model->conv3_4_running_mean, 1, (size_t[]){120}, DTYPE_F32, array_pointer); array_pointer += 120; - NN_init_tensor(&model->conv3_4_running_var, 1, (size_t[]){120}, DTYPE_F32, array_pointer); + nn_init_tensor(&model->conv3_4_running_var, 1, (size_t[]){120}, DTYPE_F32, array_pointer); array_pointer += 120; - NN_init_tensor(&model->conv3_4, 4, (size_t[]){1, 56, 56, 120}, DTYPE_F32, NULL); + nn_init_tensor(&model->conv3_4, 4, (size_t[]){1, 56, 56, 120}, DTYPE_F32, NULL); // : conv3_5 - NN_init_tensor(&model->conv3_5, 4, (size_t[]){1, 56, 56, 120}, DTYPE_F32, NULL); + nn_init_tensor(&model->conv3_5, 4, (size_t[]){1, 56, 56, 120}, DTYPE_F32, NULL); // : conv4_0 - NN_init_tensor(&model->conv4_0_weight, 4, (size_t[]){3, 3, 1, 120}, DTYPE_F32, array_pointer); + nn_init_tensor(&model->conv4_0_weight, 4, (size_t[]){3, 3, 1, 120}, DTYPE_F32, array_pointer); array_pointer += 1080; - NN_init_tensor(&model->conv4_0, 4, (size_t[]){1, 28, 28, 120}, DTYPE_F32, NULL); + nn_init_tensor(&model->conv4_0, 4, (size_t[]){1, 28, 28, 120}, DTYPE_F32, NULL); // : conv4_1 - NN_init_tensor(&model->conv4_1_weight, 1, (size_t[]){120}, DTYPE_F32, array_pointer); + nn_init_tensor(&model->conv4_1_weight, 1, (size_t[]){120}, DTYPE_F32, array_pointer); array_pointer += 120; - NN_init_tensor(&model->conv4_1_bias, 1, (size_t[]){120}, DTYPE_F32, array_pointer); + nn_init_tensor(&model->conv4_1_bias, 1, (size_t[]){120}, DTYPE_F32, array_pointer); array_pointer += 120; - NN_init_tensor(&model->conv4_1_running_mean, 1, (size_t[]){120}, DTYPE_F32, array_pointer); + nn_init_tensor(&model->conv4_1_running_mean, 1, (size_t[]){120}, DTYPE_F32, array_pointer); array_pointer += 120; - NN_init_tensor(&model->conv4_1_running_var, 1, (size_t[]){120}, DTYPE_F32, array_pointer); + nn_init_tensor(&model->conv4_1_running_var, 1, (size_t[]){120}, DTYPE_F32, array_pointer); array_pointer += 120; - NN_init_tensor(&model->conv4_1, 4, (size_t[]){1, 28, 28, 120}, DTYPE_F32, NULL); + nn_init_tensor(&model->conv4_1, 4, (size_t[]){1, 28, 28, 120}, DTYPE_F32, NULL); // : conv4_2 - NN_init_tensor(&model->conv4_2, 4, (size_t[]){1, 28, 28, 120}, DTYPE_F32, NULL); + nn_init_tensor(&model->conv4_2, 4, (size_t[]){1, 28, 28, 120}, DTYPE_F32, NULL); // : conv4_3 - NN_init_tensor(&model->conv4_3_weight, 4, (size_t[]){1, 1, 120, 144}, DTYPE_F32, array_pointer); + nn_init_tensor(&model->conv4_3_weight, 4, (size_t[]){1, 1, 120, 144}, DTYPE_F32, array_pointer); array_pointer += 17280; - NN_init_tensor(&model->conv4_3, 4, (size_t[]){1, 28, 28, 144}, DTYPE_F32, NULL); + nn_init_tensor(&model->conv4_3, 4, (size_t[]){1, 28, 28, 144}, DTYPE_F32, NULL); // : conv4_4 - NN_init_tensor(&model->conv4_4_weight, 1, (size_t[]){144}, DTYPE_F32, array_pointer); + nn_init_tensor(&model->conv4_4_weight, 1, (size_t[]){144}, DTYPE_F32, array_pointer); array_pointer += 144; - NN_init_tensor(&model->conv4_4_bias, 1, (size_t[]){144}, DTYPE_F32, array_pointer); + nn_init_tensor(&model->conv4_4_bias, 1, (size_t[]){144}, DTYPE_F32, array_pointer); array_pointer += 144; - NN_init_tensor(&model->conv4_4_running_mean, 1, (size_t[]){144}, DTYPE_F32, array_pointer); + nn_init_tensor(&model->conv4_4_running_mean, 1, (size_t[]){144}, DTYPE_F32, array_pointer); array_pointer += 144; - NN_init_tensor(&model->conv4_4_running_var, 1, (size_t[]){144}, DTYPE_F32, array_pointer); + nn_init_tensor(&model->conv4_4_running_var, 1, (size_t[]){144}, DTYPE_F32, array_pointer); array_pointer += 144; - NN_init_tensor(&model->conv4_4, 4, (size_t[]){1, 28, 28, 144}, DTYPE_F32, NULL); + nn_init_tensor(&model->conv4_4, 4, (size_t[]){1, 28, 28, 144}, DTYPE_F32, NULL); // : conv4_5 - NN_init_tensor(&model->conv4_5, 4, (size_t[]){1, 28, 28, 144}, DTYPE_F32, NULL); + nn_init_tensor(&model->conv4_5, 4, (size_t[]){1, 28, 28, 144}, DTYPE_F32, NULL); // : conv5_0 - NN_init_tensor(&model->conv5_0_weight, 4, (size_t[]){3, 3, 1, 144}, DTYPE_F32, array_pointer); + nn_init_tensor(&model->conv5_0_weight, 4, (size_t[]){3, 3, 1, 144}, DTYPE_F32, array_pointer); array_pointer += 1296; - NN_init_tensor(&model->conv5_0, 4, (size_t[]){1, 28, 28, 144}, DTYPE_F32, NULL); + nn_init_tensor(&model->conv5_0, 4, (size_t[]){1, 28, 28, 144}, DTYPE_F32, NULL); // : conv5_1 - NN_init_tensor(&model->conv5_1_weight, 1, (size_t[]){144}, DTYPE_F32, array_pointer); + nn_init_tensor(&model->conv5_1_weight, 1, (size_t[]){144}, DTYPE_F32, array_pointer); array_pointer += 144; - NN_init_tensor(&model->conv5_1_bias, 1, (size_t[]){144}, DTYPE_F32, array_pointer); + nn_init_tensor(&model->conv5_1_bias, 1, (size_t[]){144}, DTYPE_F32, array_pointer); array_pointer += 144; - NN_init_tensor(&model->conv5_1_running_mean, 1, (size_t[]){144}, DTYPE_F32, array_pointer); + nn_init_tensor(&model->conv5_1_running_mean, 1, (size_t[]){144}, DTYPE_F32, array_pointer); array_pointer += 144; - NN_init_tensor(&model->conv5_1_running_var, 1, (size_t[]){144}, DTYPE_F32, array_pointer); + nn_init_tensor(&model->conv5_1_running_var, 1, (size_t[]){144}, DTYPE_F32, array_pointer); array_pointer += 144; - NN_init_tensor(&model->conv5_1, 4, (size_t[]){1, 28, 28, 144}, DTYPE_F32, NULL); + nn_init_tensor(&model->conv5_1, 4, (size_t[]){1, 28, 28, 144}, DTYPE_F32, NULL); // : conv5_2 - NN_init_tensor(&model->conv5_2, 4, (size_t[]){1, 28, 28, 144}, DTYPE_F32, NULL); + nn_init_tensor(&model->conv5_2, 4, (size_t[]){1, 28, 28, 144}, DTYPE_F32, NULL); // : conv5_3 - NN_init_tensor(&model->conv5_3_weight, 4, (size_t[]){1, 1, 144, 256}, DTYPE_F32, array_pointer); + nn_init_tensor(&model->conv5_3_weight, 4, (size_t[]){1, 1, 144, 256}, DTYPE_F32, array_pointer); array_pointer += 36864; - NN_init_tensor(&model->conv5_3, 4, (size_t[]){1, 28, 28, 256}, DTYPE_F32, NULL); + nn_init_tensor(&model->conv5_3, 4, (size_t[]){1, 28, 28, 256}, DTYPE_F32, NULL); // : conv5_4 - NN_init_tensor(&model->conv5_4_weight, 1, (size_t[]){256}, DTYPE_F32, array_pointer); + nn_init_tensor(&model->conv5_4_weight, 1, (size_t[]){256}, DTYPE_F32, array_pointer); array_pointer += 256; - NN_init_tensor(&model->conv5_4_bias, 1, (size_t[]){256}, DTYPE_F32, array_pointer); + nn_init_tensor(&model->conv5_4_bias, 1, (size_t[]){256}, DTYPE_F32, array_pointer); array_pointer += 256; - NN_init_tensor(&model->conv5_4_running_mean, 1, (size_t[]){256}, DTYPE_F32, array_pointer); + nn_init_tensor(&model->conv5_4_running_mean, 1, (size_t[]){256}, DTYPE_F32, array_pointer); array_pointer += 256; - NN_init_tensor(&model->conv5_4_running_var, 1, (size_t[]){256}, DTYPE_F32, array_pointer); + nn_init_tensor(&model->conv5_4_running_var, 1, (size_t[]){256}, DTYPE_F32, array_pointer); array_pointer += 256; - NN_init_tensor(&model->conv5_4, 4, (size_t[]){1, 28, 28, 256}, DTYPE_F32, NULL); + nn_init_tensor(&model->conv5_4, 4, (size_t[]){1, 28, 28, 256}, DTYPE_F32, NULL); // : conv5_5 - NN_init_tensor(&model->conv5_5, 4, (size_t[]){1, 28, 28, 256}, DTYPE_F32, NULL); + nn_init_tensor(&model->conv5_5, 4, (size_t[]){1, 28, 28, 256}, DTYPE_F32, NULL); // : conv6_0 - NN_init_tensor(&model->conv6_0_weight, 4, (size_t[]){3, 3, 1, 256}, DTYPE_F32, array_pointer); + nn_init_tensor(&model->conv6_0_weight, 4, (size_t[]){3, 3, 1, 256}, DTYPE_F32, array_pointer); array_pointer += 2304; - NN_init_tensor(&model->conv6_0, 4, (size_t[]){1, 14, 14, 256}, DTYPE_F32, NULL); + nn_init_tensor(&model->conv6_0, 4, (size_t[]){1, 14, 14, 256}, DTYPE_F32, NULL); // : conv6_1 - NN_init_tensor(&model->conv6_1_weight, 1, (size_t[]){256}, DTYPE_F32, array_pointer); + nn_init_tensor(&model->conv6_1_weight, 1, (size_t[]){256}, DTYPE_F32, array_pointer); array_pointer += 256; - NN_init_tensor(&model->conv6_1_bias, 1, (size_t[]){256}, DTYPE_F32, array_pointer); + nn_init_tensor(&model->conv6_1_bias, 1, (size_t[]){256}, DTYPE_F32, array_pointer); array_pointer += 256; - NN_init_tensor(&model->conv6_1_running_mean, 1, (size_t[]){256}, DTYPE_F32, array_pointer); + nn_init_tensor(&model->conv6_1_running_mean, 1, (size_t[]){256}, DTYPE_F32, array_pointer); array_pointer += 256; - NN_init_tensor(&model->conv6_1_running_var, 1, (size_t[]){256}, DTYPE_F32, array_pointer); + nn_init_tensor(&model->conv6_1_running_var, 1, (size_t[]){256}, DTYPE_F32, array_pointer); array_pointer += 256; - NN_init_tensor(&model->conv6_1, 4, (size_t[]){1, 14, 14, 256}, DTYPE_F32, NULL); + nn_init_tensor(&model->conv6_1, 4, (size_t[]){1, 14, 14, 256}, DTYPE_F32, NULL); // : conv6_2 - NN_init_tensor(&model->conv6_2, 4, (size_t[]){1, 14, 14, 256}, DTYPE_F32, NULL); + nn_init_tensor(&model->conv6_2, 4, (size_t[]){1, 14, 14, 256}, DTYPE_F32, NULL); // : conv6_3 - NN_init_tensor(&model->conv6_3_weight, 4, (size_t[]){1, 1, 256, 408}, DTYPE_F32, array_pointer); + nn_init_tensor(&model->conv6_3_weight, 4, (size_t[]){1, 1, 256, 408}, DTYPE_F32, array_pointer); array_pointer += 104448; - NN_init_tensor(&model->conv6_3, 4, (size_t[]){1, 14, 14, 408}, DTYPE_F32, NULL); + nn_init_tensor(&model->conv6_3, 4, (size_t[]){1, 14, 14, 408}, DTYPE_F32, NULL); // : conv6_4 - NN_init_tensor(&model->conv6_4_weight, 1, (size_t[]){408}, DTYPE_F32, array_pointer); + nn_init_tensor(&model->conv6_4_weight, 1, (size_t[]){408}, DTYPE_F32, array_pointer); array_pointer += 408; - NN_init_tensor(&model->conv6_4_bias, 1, (size_t[]){408}, DTYPE_F32, array_pointer); + nn_init_tensor(&model->conv6_4_bias, 1, (size_t[]){408}, DTYPE_F32, array_pointer); array_pointer += 408; - NN_init_tensor(&model->conv6_4_running_mean, 1, (size_t[]){408}, DTYPE_F32, array_pointer); + nn_init_tensor(&model->conv6_4_running_mean, 1, (size_t[]){408}, DTYPE_F32, array_pointer); array_pointer += 408; - NN_init_tensor(&model->conv6_4_running_var, 1, (size_t[]){408}, DTYPE_F32, array_pointer); + nn_init_tensor(&model->conv6_4_running_var, 1, (size_t[]){408}, DTYPE_F32, array_pointer); array_pointer += 408; - NN_init_tensor(&model->conv6_4, 4, (size_t[]){1, 14, 14, 408}, DTYPE_F32, NULL); + nn_init_tensor(&model->conv6_4, 4, (size_t[]){1, 14, 14, 408}, DTYPE_F32, NULL); // : conv6_5 - NN_init_tensor(&model->conv6_5, 4, (size_t[]){1, 14, 14, 408}, DTYPE_F32, NULL); + nn_init_tensor(&model->conv6_5, 4, (size_t[]){1, 14, 14, 408}, DTYPE_F32, NULL); // : conv7_0 - NN_init_tensor(&model->conv7_0_weight, 4, (size_t[]){3, 3, 1, 408}, DTYPE_F32, array_pointer); + nn_init_tensor(&model->conv7_0_weight, 4, (size_t[]){3, 3, 1, 408}, DTYPE_F32, array_pointer); array_pointer += 3672; - NN_init_tensor(&model->conv7_0, 4, (size_t[]){1, 14, 14, 408}, DTYPE_F32, NULL); + nn_init_tensor(&model->conv7_0, 4, (size_t[]){1, 14, 14, 408}, DTYPE_F32, NULL); // : conv7_1 - NN_init_tensor(&model->conv7_1_weight, 1, (size_t[]){408}, DTYPE_F32, array_pointer); + nn_init_tensor(&model->conv7_1_weight, 1, (size_t[]){408}, DTYPE_F32, array_pointer); array_pointer += 408; - NN_init_tensor(&model->conv7_1_bias, 1, (size_t[]){408}, DTYPE_F32, array_pointer); + nn_init_tensor(&model->conv7_1_bias, 1, (size_t[]){408}, DTYPE_F32, array_pointer); array_pointer += 408; - NN_init_tensor(&model->conv7_1_running_mean, 1, (size_t[]){408}, DTYPE_F32, array_pointer); + nn_init_tensor(&model->conv7_1_running_mean, 1, (size_t[]){408}, DTYPE_F32, array_pointer); array_pointer += 408; - NN_init_tensor(&model->conv7_1_running_var, 1, (size_t[]){408}, DTYPE_F32, array_pointer); + nn_init_tensor(&model->conv7_1_running_var, 1, (size_t[]){408}, DTYPE_F32, array_pointer); array_pointer += 408; - NN_init_tensor(&model->conv7_1, 4, (size_t[]){1, 14, 14, 408}, DTYPE_F32, NULL); + nn_init_tensor(&model->conv7_1, 4, (size_t[]){1, 14, 14, 408}, DTYPE_F32, NULL); // : conv7_2 - NN_init_tensor(&model->conv7_2, 4, (size_t[]){1, 14, 14, 408}, DTYPE_F32, NULL); + nn_init_tensor(&model->conv7_2, 4, (size_t[]){1, 14, 14, 408}, DTYPE_F32, NULL); // : conv7_3 - NN_init_tensor(&model->conv7_3_weight, 4, (size_t[]){1, 1, 408, 376}, DTYPE_F32, array_pointer); + nn_init_tensor(&model->conv7_3_weight, 4, (size_t[]){1, 1, 408, 376}, DTYPE_F32, array_pointer); array_pointer += 153408; - NN_init_tensor(&model->conv7_3, 4, (size_t[]){1, 14, 14, 376}, DTYPE_F32, NULL); + nn_init_tensor(&model->conv7_3, 4, (size_t[]){1, 14, 14, 376}, DTYPE_F32, NULL); // : conv7_4 - NN_init_tensor(&model->conv7_4_weight, 1, (size_t[]){376}, DTYPE_F32, array_pointer); + nn_init_tensor(&model->conv7_4_weight, 1, (size_t[]){376}, DTYPE_F32, array_pointer); array_pointer += 376; - NN_init_tensor(&model->conv7_4_bias, 1, (size_t[]){376}, DTYPE_F32, array_pointer); + nn_init_tensor(&model->conv7_4_bias, 1, (size_t[]){376}, DTYPE_F32, array_pointer); array_pointer += 376; - NN_init_tensor(&model->conv7_4_running_mean, 1, (size_t[]){376}, DTYPE_F32, array_pointer); + nn_init_tensor(&model->conv7_4_running_mean, 1, (size_t[]){376}, DTYPE_F32, array_pointer); array_pointer += 376; - NN_init_tensor(&model->conv7_4_running_var, 1, (size_t[]){376}, DTYPE_F32, array_pointer); + nn_init_tensor(&model->conv7_4_running_var, 1, (size_t[]){376}, DTYPE_F32, array_pointer); array_pointer += 376; - NN_init_tensor(&model->conv7_4, 4, (size_t[]){1, 14, 14, 376}, DTYPE_F32, NULL); + nn_init_tensor(&model->conv7_4, 4, (size_t[]){1, 14, 14, 376}, DTYPE_F32, NULL); // : conv7_5 - NN_init_tensor(&model->conv7_5, 4, (size_t[]){1, 14, 14, 376}, DTYPE_F32, NULL); + nn_init_tensor(&model->conv7_5, 4, (size_t[]){1, 14, 14, 376}, DTYPE_F32, NULL); // : conv8_0 - NN_init_tensor(&model->conv8_0_weight, 4, (size_t[]){3, 3, 1, 376}, DTYPE_F32, array_pointer); + nn_init_tensor(&model->conv8_0_weight, 4, (size_t[]){3, 3, 1, 376}, DTYPE_F32, array_pointer); array_pointer += 3384; - NN_init_tensor(&model->conv8_0, 4, (size_t[]){1, 14, 14, 376}, DTYPE_F32, NULL); + nn_init_tensor(&model->conv8_0, 4, (size_t[]){1, 14, 14, 376}, DTYPE_F32, NULL); // : conv8_1 - NN_init_tensor(&model->conv8_1_weight, 1, (size_t[]){376}, DTYPE_F32, array_pointer); + nn_init_tensor(&model->conv8_1_weight, 1, (size_t[]){376}, DTYPE_F32, array_pointer); array_pointer += 376; - NN_init_tensor(&model->conv8_1_bias, 1, (size_t[]){376}, DTYPE_F32, array_pointer); + nn_init_tensor(&model->conv8_1_bias, 1, (size_t[]){376}, DTYPE_F32, array_pointer); array_pointer += 376; - NN_init_tensor(&model->conv8_1_running_mean, 1, (size_t[]){376}, DTYPE_F32, array_pointer); + nn_init_tensor(&model->conv8_1_running_mean, 1, (size_t[]){376}, DTYPE_F32, array_pointer); array_pointer += 376; - NN_init_tensor(&model->conv8_1_running_var, 1, (size_t[]){376}, DTYPE_F32, array_pointer); + nn_init_tensor(&model->conv8_1_running_var, 1, (size_t[]){376}, DTYPE_F32, array_pointer); array_pointer += 376; - NN_init_tensor(&model->conv8_1, 4, (size_t[]){1, 14, 14, 376}, DTYPE_F32, NULL); + nn_init_tensor(&model->conv8_1, 4, (size_t[]){1, 14, 14, 376}, DTYPE_F32, NULL); // : conv8_2 - NN_init_tensor(&model->conv8_2, 4, (size_t[]){1, 14, 14, 376}, DTYPE_F32, NULL); + nn_init_tensor(&model->conv8_2, 4, (size_t[]){1, 14, 14, 376}, DTYPE_F32, NULL); // : conv8_3 - NN_init_tensor(&model->conv8_3_weight, 4, (size_t[]){1, 1, 376, 272}, DTYPE_F32, array_pointer); + nn_init_tensor(&model->conv8_3_weight, 4, (size_t[]){1, 1, 376, 272}, DTYPE_F32, array_pointer); array_pointer += 102272; - NN_init_tensor(&model->conv8_3, 4, (size_t[]){1, 14, 14, 272}, DTYPE_F32, NULL); + nn_init_tensor(&model->conv8_3, 4, (size_t[]){1, 14, 14, 272}, DTYPE_F32, NULL); // : conv8_4 - NN_init_tensor(&model->conv8_4_weight, 1, (size_t[]){272}, DTYPE_F32, array_pointer); + nn_init_tensor(&model->conv8_4_weight, 1, (size_t[]){272}, DTYPE_F32, array_pointer); array_pointer += 272; - NN_init_tensor(&model->conv8_4_bias, 1, (size_t[]){272}, DTYPE_F32, array_pointer); + nn_init_tensor(&model->conv8_4_bias, 1, (size_t[]){272}, DTYPE_F32, array_pointer); array_pointer += 272; - NN_init_tensor(&model->conv8_4_running_mean, 1, (size_t[]){272}, DTYPE_F32, array_pointer); + nn_init_tensor(&model->conv8_4_running_mean, 1, (size_t[]){272}, DTYPE_F32, array_pointer); array_pointer += 272; - NN_init_tensor(&model->conv8_4_running_var, 1, (size_t[]){272}, DTYPE_F32, array_pointer); + nn_init_tensor(&model->conv8_4_running_var, 1, (size_t[]){272}, DTYPE_F32, array_pointer); array_pointer += 272; - NN_init_tensor(&model->conv8_4, 4, (size_t[]){1, 14, 14, 272}, DTYPE_F32, NULL); + nn_init_tensor(&model->conv8_4, 4, (size_t[]){1, 14, 14, 272}, DTYPE_F32, NULL); // : conv8_5 - NN_init_tensor(&model->conv8_5, 4, (size_t[]){1, 14, 14, 272}, DTYPE_F32, NULL); + nn_init_tensor(&model->conv8_5, 4, (size_t[]){1, 14, 14, 272}, DTYPE_F32, NULL); // : conv9_0 - NN_init_tensor(&model->conv9_0_weight, 4, (size_t[]){3, 3, 1, 272}, DTYPE_F32, array_pointer); + nn_init_tensor(&model->conv9_0_weight, 4, (size_t[]){3, 3, 1, 272}, DTYPE_F32, array_pointer); array_pointer += 2448; - NN_init_tensor(&model->conv9_0, 4, (size_t[]){1, 14, 14, 272}, DTYPE_F32, NULL); + nn_init_tensor(&model->conv9_0, 4, (size_t[]){1, 14, 14, 272}, DTYPE_F32, NULL); // : conv9_1 - NN_init_tensor(&model->conv9_1_weight, 1, (size_t[]){272}, DTYPE_F32, array_pointer); + nn_init_tensor(&model->conv9_1_weight, 1, (size_t[]){272}, DTYPE_F32, array_pointer); array_pointer += 272; - NN_init_tensor(&model->conv9_1_bias, 1, (size_t[]){272}, DTYPE_F32, array_pointer); + nn_init_tensor(&model->conv9_1_bias, 1, (size_t[]){272}, DTYPE_F32, array_pointer); array_pointer += 272; - NN_init_tensor(&model->conv9_1_running_mean, 1, (size_t[]){272}, DTYPE_F32, array_pointer); + nn_init_tensor(&model->conv9_1_running_mean, 1, (size_t[]){272}, DTYPE_F32, array_pointer); array_pointer += 272; - NN_init_tensor(&model->conv9_1_running_var, 1, (size_t[]){272}, DTYPE_F32, array_pointer); + nn_init_tensor(&model->conv9_1_running_var, 1, (size_t[]){272}, DTYPE_F32, array_pointer); array_pointer += 272; - NN_init_tensor(&model->conv9_1, 4, (size_t[]){1, 14, 14, 272}, DTYPE_F32, NULL); + nn_init_tensor(&model->conv9_1, 4, (size_t[]){1, 14, 14, 272}, DTYPE_F32, NULL); // : conv9_2 - NN_init_tensor(&model->conv9_2, 4, (size_t[]){1, 14, 14, 272}, DTYPE_F32, NULL); + nn_init_tensor(&model->conv9_2, 4, (size_t[]){1, 14, 14, 272}, DTYPE_F32, NULL); // : conv9_3 - NN_init_tensor(&model->conv9_3_weight, 4, (size_t[]){1, 1, 272, 288}, DTYPE_F32, array_pointer); + nn_init_tensor(&model->conv9_3_weight, 4, (size_t[]){1, 1, 272, 288}, DTYPE_F32, array_pointer); array_pointer += 78336; - NN_init_tensor(&model->conv9_3, 4, (size_t[]){1, 14, 14, 288}, DTYPE_F32, NULL); + nn_init_tensor(&model->conv9_3, 4, (size_t[]){1, 14, 14, 288}, DTYPE_F32, NULL); // : conv9_4 - NN_init_tensor(&model->conv9_4_weight, 1, (size_t[]){288}, DTYPE_F32, array_pointer); + nn_init_tensor(&model->conv9_4_weight, 1, (size_t[]){288}, DTYPE_F32, array_pointer); array_pointer += 288; - NN_init_tensor(&model->conv9_4_bias, 1, (size_t[]){288}, DTYPE_F32, array_pointer); + nn_init_tensor(&model->conv9_4_bias, 1, (size_t[]){288}, DTYPE_F32, array_pointer); array_pointer += 288; - NN_init_tensor(&model->conv9_4_running_mean, 1, (size_t[]){288}, DTYPE_F32, array_pointer); + nn_init_tensor(&model->conv9_4_running_mean, 1, (size_t[]){288}, DTYPE_F32, array_pointer); array_pointer += 288; - NN_init_tensor(&model->conv9_4_running_var, 1, (size_t[]){288}, DTYPE_F32, array_pointer); + nn_init_tensor(&model->conv9_4_running_var, 1, (size_t[]){288}, DTYPE_F32, array_pointer); array_pointer += 288; - NN_init_tensor(&model->conv9_4, 4, (size_t[]){1, 14, 14, 288}, DTYPE_F32, NULL); + nn_init_tensor(&model->conv9_4, 4, (size_t[]){1, 14, 14, 288}, DTYPE_F32, NULL); // : conv9_5 - NN_init_tensor(&model->conv9_5, 4, (size_t[]){1, 14, 14, 288}, DTYPE_F32, NULL); + nn_init_tensor(&model->conv9_5, 4, (size_t[]){1, 14, 14, 288}, DTYPE_F32, NULL); // : conv10_0 - NN_init_tensor(&model->conv10_0_weight, 4, (size_t[]){3, 3, 1, 288}, DTYPE_F32, array_pointer); + nn_init_tensor(&model->conv10_0_weight, 4, (size_t[]){3, 3, 1, 288}, DTYPE_F32, array_pointer); array_pointer += 2592; - NN_init_tensor(&model->conv10_0, 4, (size_t[]){1, 14, 14, 288}, DTYPE_F32, NULL); + nn_init_tensor(&model->conv10_0, 4, (size_t[]){1, 14, 14, 288}, DTYPE_F32, NULL); // : conv10_1 - NN_init_tensor(&model->conv10_1_weight, 1, (size_t[]){288}, DTYPE_F32, array_pointer); + nn_init_tensor(&model->conv10_1_weight, 1, (size_t[]){288}, DTYPE_F32, array_pointer); array_pointer += 288; - NN_init_tensor(&model->conv10_1_bias, 1, (size_t[]){288}, DTYPE_F32, array_pointer); + nn_init_tensor(&model->conv10_1_bias, 1, (size_t[]){288}, DTYPE_F32, array_pointer); array_pointer += 288; - NN_init_tensor(&model->conv10_1_running_mean, 1, (size_t[]){288}, DTYPE_F32, array_pointer); + nn_init_tensor(&model->conv10_1_running_mean, 1, (size_t[]){288}, DTYPE_F32, array_pointer); array_pointer += 288; - NN_init_tensor(&model->conv10_1_running_var, 1, (size_t[]){288}, DTYPE_F32, array_pointer); + nn_init_tensor(&model->conv10_1_running_var, 1, (size_t[]){288}, DTYPE_F32, array_pointer); array_pointer += 288; - NN_init_tensor(&model->conv10_1, 4, (size_t[]){1, 14, 14, 288}, DTYPE_F32, NULL); + nn_init_tensor(&model->conv10_1, 4, (size_t[]){1, 14, 14, 288}, DTYPE_F32, NULL); // : conv10_2 - NN_init_tensor(&model->conv10_2, 4, (size_t[]){1, 14, 14, 288}, DTYPE_F32, NULL); + nn_init_tensor(&model->conv10_2, 4, (size_t[]){1, 14, 14, 288}, DTYPE_F32, NULL); // : conv10_3 - NN_init_tensor(&model->conv10_3_weight, 4, (size_t[]){1, 1, 288, 296}, DTYPE_F32, array_pointer); + nn_init_tensor(&model->conv10_3_weight, 4, (size_t[]){1, 1, 288, 296}, DTYPE_F32, array_pointer); array_pointer += 85248; - NN_init_tensor(&model->conv10_3, 4, (size_t[]){1, 14, 14, 296}, DTYPE_F32, NULL); + nn_init_tensor(&model->conv10_3, 4, (size_t[]){1, 14, 14, 296}, DTYPE_F32, NULL); // : conv10_4 - NN_init_tensor(&model->conv10_4_weight, 1, (size_t[]){296}, DTYPE_F32, array_pointer); + nn_init_tensor(&model->conv10_4_weight, 1, (size_t[]){296}, DTYPE_F32, array_pointer); array_pointer += 296; - NN_init_tensor(&model->conv10_4_bias, 1, (size_t[]){296}, DTYPE_F32, array_pointer); + nn_init_tensor(&model->conv10_4_bias, 1, (size_t[]){296}, DTYPE_F32, array_pointer); array_pointer += 296; - NN_init_tensor(&model->conv10_4_running_mean, 1, (size_t[]){296}, DTYPE_F32, array_pointer); + nn_init_tensor(&model->conv10_4_running_mean, 1, (size_t[]){296}, DTYPE_F32, array_pointer); array_pointer += 296; - NN_init_tensor(&model->conv10_4_running_var, 1, (size_t[]){296}, DTYPE_F32, array_pointer); + nn_init_tensor(&model->conv10_4_running_var, 1, (size_t[]){296}, DTYPE_F32, array_pointer); array_pointer += 296; - NN_init_tensor(&model->conv10_4, 4, (size_t[]){1, 14, 14, 296}, DTYPE_F32, NULL); + nn_init_tensor(&model->conv10_4, 4, (size_t[]){1, 14, 14, 296}, DTYPE_F32, NULL); // : conv10_5 - NN_init_tensor(&model->conv10_5, 4, (size_t[]){1, 14, 14, 296}, DTYPE_F32, NULL); + nn_init_tensor(&model->conv10_5, 4, (size_t[]){1, 14, 14, 296}, DTYPE_F32, NULL); // : conv11_0 - NN_init_tensor(&model->conv11_0_weight, 4, (size_t[]){3, 3, 1, 296}, DTYPE_F32, array_pointer); + nn_init_tensor(&model->conv11_0_weight, 4, (size_t[]){3, 3, 1, 296}, DTYPE_F32, array_pointer); array_pointer += 2664; - NN_init_tensor(&model->conv11_0, 4, (size_t[]){1, 14, 14, 296}, DTYPE_F32, NULL); + nn_init_tensor(&model->conv11_0, 4, (size_t[]){1, 14, 14, 296}, DTYPE_F32, NULL); // : conv11_1 - NN_init_tensor(&model->conv11_1_weight, 1, (size_t[]){296}, DTYPE_F32, array_pointer); + nn_init_tensor(&model->conv11_1_weight, 1, (size_t[]){296}, DTYPE_F32, array_pointer); array_pointer += 296; - NN_init_tensor(&model->conv11_1_bias, 1, (size_t[]){296}, DTYPE_F32, array_pointer); + nn_init_tensor(&model->conv11_1_bias, 1, (size_t[]){296}, DTYPE_F32, array_pointer); array_pointer += 296; - NN_init_tensor(&model->conv11_1_running_mean, 1, (size_t[]){296}, DTYPE_F32, array_pointer); + nn_init_tensor(&model->conv11_1_running_mean, 1, (size_t[]){296}, DTYPE_F32, array_pointer); array_pointer += 296; - NN_init_tensor(&model->conv11_1_running_var, 1, (size_t[]){296}, DTYPE_F32, array_pointer); + nn_init_tensor(&model->conv11_1_running_var, 1, (size_t[]){296}, DTYPE_F32, array_pointer); array_pointer += 296; - NN_init_tensor(&model->conv11_1, 4, (size_t[]){1, 14, 14, 296}, DTYPE_F32, NULL); + nn_init_tensor(&model->conv11_1, 4, (size_t[]){1, 14, 14, 296}, DTYPE_F32, NULL); // : conv11_2 - NN_init_tensor(&model->conv11_2, 4, (size_t[]){1, 14, 14, 296}, DTYPE_F32, NULL); + nn_init_tensor(&model->conv11_2, 4, (size_t[]){1, 14, 14, 296}, DTYPE_F32, NULL); // : conv11_3 - NN_init_tensor(&model->conv11_3_weight, 4, (size_t[]){1, 1, 296, 328}, DTYPE_F32, array_pointer); + nn_init_tensor(&model->conv11_3_weight, 4, (size_t[]){1, 1, 296, 328}, DTYPE_F32, array_pointer); array_pointer += 97088; - NN_init_tensor(&model->conv11_3, 4, (size_t[]){1, 14, 14, 328}, DTYPE_F32, NULL); + nn_init_tensor(&model->conv11_3, 4, (size_t[]){1, 14, 14, 328}, DTYPE_F32, NULL); // : conv11_4 - NN_init_tensor(&model->conv11_4_weight, 1, (size_t[]){328}, DTYPE_F32, array_pointer); + nn_init_tensor(&model->conv11_4_weight, 1, (size_t[]){328}, DTYPE_F32, array_pointer); array_pointer += 328; - NN_init_tensor(&model->conv11_4_bias, 1, (size_t[]){328}, DTYPE_F32, array_pointer); + nn_init_tensor(&model->conv11_4_bias, 1, (size_t[]){328}, DTYPE_F32, array_pointer); array_pointer += 328; - NN_init_tensor(&model->conv11_4_running_mean, 1, (size_t[]){328}, DTYPE_F32, array_pointer); + nn_init_tensor(&model->conv11_4_running_mean, 1, (size_t[]){328}, DTYPE_F32, array_pointer); array_pointer += 328; - NN_init_tensor(&model->conv11_4_running_var, 1, (size_t[]){328}, DTYPE_F32, array_pointer); + nn_init_tensor(&model->conv11_4_running_var, 1, (size_t[]){328}, DTYPE_F32, array_pointer); array_pointer += 328; - NN_init_tensor(&model->conv11_4, 4, (size_t[]){1, 14, 14, 328}, DTYPE_F32, NULL); + nn_init_tensor(&model->conv11_4, 4, (size_t[]){1, 14, 14, 328}, DTYPE_F32, NULL); // : conv11_5 - NN_init_tensor(&model->conv11_5, 4, (size_t[]){1, 14, 14, 328}, DTYPE_F32, NULL); + nn_init_tensor(&model->conv11_5, 4, (size_t[]){1, 14, 14, 328}, DTYPE_F32, NULL); // : conv12_0 - NN_init_tensor(&model->conv12_0_weight, 4, (size_t[]){3, 3, 1, 328}, DTYPE_F32, array_pointer); + nn_init_tensor(&model->conv12_0_weight, 4, (size_t[]){3, 3, 1, 328}, DTYPE_F32, array_pointer); array_pointer += 2952; - NN_init_tensor(&model->conv12_0, 4, (size_t[]){1, 7, 7, 328}, DTYPE_F32, NULL); + nn_init_tensor(&model->conv12_0, 4, (size_t[]){1, 7, 7, 328}, DTYPE_F32, NULL); // : conv12_1 - NN_init_tensor(&model->conv12_1_weight, 1, (size_t[]){328}, DTYPE_F32, array_pointer); + nn_init_tensor(&model->conv12_1_weight, 1, (size_t[]){328}, DTYPE_F32, array_pointer); array_pointer += 328; - NN_init_tensor(&model->conv12_1_bias, 1, (size_t[]){328}, DTYPE_F32, array_pointer); + nn_init_tensor(&model->conv12_1_bias, 1, (size_t[]){328}, DTYPE_F32, array_pointer); array_pointer += 328; - NN_init_tensor(&model->conv12_1_running_mean, 1, (size_t[]){328}, DTYPE_F32, array_pointer); + nn_init_tensor(&model->conv12_1_running_mean, 1, (size_t[]){328}, DTYPE_F32, array_pointer); array_pointer += 328; - NN_init_tensor(&model->conv12_1_running_var, 1, (size_t[]){328}, DTYPE_F32, array_pointer); + nn_init_tensor(&model->conv12_1_running_var, 1, (size_t[]){328}, DTYPE_F32, array_pointer); array_pointer += 328; - NN_init_tensor(&model->conv12_1, 4, (size_t[]){1, 7, 7, 328}, DTYPE_F32, NULL); + nn_init_tensor(&model->conv12_1, 4, (size_t[]){1, 7, 7, 328}, DTYPE_F32, NULL); // : conv12_2 - NN_init_tensor(&model->conv12_2, 4, (size_t[]){1, 7, 7, 328}, DTYPE_F32, NULL); + nn_init_tensor(&model->conv12_2, 4, (size_t[]){1, 7, 7, 328}, DTYPE_F32, NULL); // : conv12_3 - NN_init_tensor(&model->conv12_3_weight, 4, (size_t[]){1, 1, 328, 480}, DTYPE_F32, array_pointer); + nn_init_tensor(&model->conv12_3_weight, 4, (size_t[]){1, 1, 328, 480}, DTYPE_F32, array_pointer); array_pointer += 157440; - NN_init_tensor(&model->conv12_3, 4, (size_t[]){1, 7, 7, 480}, DTYPE_F32, NULL); + nn_init_tensor(&model->conv12_3, 4, (size_t[]){1, 7, 7, 480}, DTYPE_F32, NULL); // : conv12_4 - NN_init_tensor(&model->conv12_4_weight, 1, (size_t[]){480}, DTYPE_F32, array_pointer); + nn_init_tensor(&model->conv12_4_weight, 1, (size_t[]){480}, DTYPE_F32, array_pointer); array_pointer += 480; - NN_init_tensor(&model->conv12_4_bias, 1, (size_t[]){480}, DTYPE_F32, array_pointer); + nn_init_tensor(&model->conv12_4_bias, 1, (size_t[]){480}, DTYPE_F32, array_pointer); array_pointer += 480; - NN_init_tensor(&model->conv12_4_running_mean, 1, (size_t[]){480}, DTYPE_F32, array_pointer); + nn_init_tensor(&model->conv12_4_running_mean, 1, (size_t[]){480}, DTYPE_F32, array_pointer); array_pointer += 480; - NN_init_tensor(&model->conv12_4_running_var, 1, (size_t[]){480}, DTYPE_F32, array_pointer); + nn_init_tensor(&model->conv12_4_running_var, 1, (size_t[]){480}, DTYPE_F32, array_pointer); array_pointer += 480; - NN_init_tensor(&model->conv12_4, 4, (size_t[]){1, 7, 7, 480}, DTYPE_F32, NULL); + nn_init_tensor(&model->conv12_4, 4, (size_t[]){1, 7, 7, 480}, DTYPE_F32, NULL); // : conv12_5 - NN_init_tensor(&model->conv12_5, 4, (size_t[]){1, 7, 7, 480}, DTYPE_F32, NULL); + nn_init_tensor(&model->conv12_5, 4, (size_t[]){1, 7, 7, 480}, DTYPE_F32, NULL); // : conv13_0 - NN_init_tensor(&model->conv13_0_weight, 4, (size_t[]){3, 3, 1, 480}, DTYPE_F32, array_pointer); + nn_init_tensor(&model->conv13_0_weight, 4, (size_t[]){3, 3, 1, 480}, DTYPE_F32, array_pointer); array_pointer += 4320; - NN_init_tensor(&model->conv13_0, 4, (size_t[]){1, 7, 7, 480}, DTYPE_F32, NULL); + nn_init_tensor(&model->conv13_0, 4, (size_t[]){1, 7, 7, 480}, DTYPE_F32, NULL); // : conv13_1 - NN_init_tensor(&model->conv13_1_weight, 1, (size_t[]){480}, DTYPE_F32, array_pointer); + nn_init_tensor(&model->conv13_1_weight, 1, (size_t[]){480}, DTYPE_F32, array_pointer); array_pointer += 480; - NN_init_tensor(&model->conv13_1_bias, 1, (size_t[]){480}, DTYPE_F32, array_pointer); + nn_init_tensor(&model->conv13_1_bias, 1, (size_t[]){480}, DTYPE_F32, array_pointer); array_pointer += 480; - NN_init_tensor(&model->conv13_1_running_mean, 1, (size_t[]){480}, DTYPE_F32, array_pointer); + nn_init_tensor(&model->conv13_1_running_mean, 1, (size_t[]){480}, DTYPE_F32, array_pointer); array_pointer += 480; - NN_init_tensor(&model->conv13_1_running_var, 1, (size_t[]){480}, DTYPE_F32, array_pointer); + nn_init_tensor(&model->conv13_1_running_var, 1, (size_t[]){480}, DTYPE_F32, array_pointer); array_pointer += 480; - NN_init_tensor(&model->conv13_1, 4, (size_t[]){1, 7, 7, 480}, DTYPE_F32, NULL); + nn_init_tensor(&model->conv13_1, 4, (size_t[]){1, 7, 7, 480}, DTYPE_F32, NULL); // : conv13_2 - NN_init_tensor(&model->conv13_2, 4, (size_t[]){1, 7, 7, 480}, DTYPE_F32, NULL); + nn_init_tensor(&model->conv13_2, 4, (size_t[]){1, 7, 7, 480}, DTYPE_F32, NULL); // : conv13_3 - NN_init_tensor(&model->conv13_3_weight, 4, (size_t[]){1, 1, 480, 512}, DTYPE_F32, array_pointer); + nn_init_tensor(&model->conv13_3_weight, 4, (size_t[]){1, 1, 480, 512}, DTYPE_F32, array_pointer); array_pointer += 245760; - NN_init_tensor(&model->conv13_3, 4, (size_t[]){1, 7, 7, 512}, DTYPE_F32, NULL); + nn_init_tensor(&model->conv13_3, 4, (size_t[]){1, 7, 7, 512}, DTYPE_F32, NULL); // : conv13_4 - NN_init_tensor(&model->conv13_4_weight, 1, (size_t[]){512}, DTYPE_F32, array_pointer); + nn_init_tensor(&model->conv13_4_weight, 1, (size_t[]){512}, DTYPE_F32, array_pointer); array_pointer += 512; - NN_init_tensor(&model->conv13_4_bias, 1, (size_t[]){512}, DTYPE_F32, array_pointer); + nn_init_tensor(&model->conv13_4_bias, 1, (size_t[]){512}, DTYPE_F32, array_pointer); array_pointer += 512; - NN_init_tensor(&model->conv13_4_running_mean, 1, (size_t[]){512}, DTYPE_F32, array_pointer); + nn_init_tensor(&model->conv13_4_running_mean, 1, (size_t[]){512}, DTYPE_F32, array_pointer); array_pointer += 512; - NN_init_tensor(&model->conv13_4_running_var, 1, (size_t[]){512}, DTYPE_F32, array_pointer); + nn_init_tensor(&model->conv13_4_running_var, 1, (size_t[]){512}, DTYPE_F32, array_pointer); array_pointer += 512; - NN_init_tensor(&model->conv13_4, 4, (size_t[]){1, 7, 7, 512}, DTYPE_F32, NULL); + nn_init_tensor(&model->conv13_4, 4, (size_t[]){1, 7, 7, 512}, DTYPE_F32, NULL); // : conv13_5 - NN_init_tensor(&model->conv13_5, 4, (size_t[]){1, 7, 7, 512}, DTYPE_F32, NULL); + nn_init_tensor(&model->conv13_5, 4, (size_t[]){1, 7, 7, 512}, DTYPE_F32, NULL); // : decode_conv1_0_0 - NN_init_tensor(&model->decode_conv1_0_0_weight, 4, (size_t[]){5, 5, 1, 512}, DTYPE_F32, array_pointer); + nn_init_tensor(&model->decode_conv1_0_0_weight, 4, (size_t[]){5, 5, 1, 512}, DTYPE_F32, array_pointer); array_pointer += 12800; - NN_init_tensor(&model->decode_conv1_0_0, 4, (size_t[]){1, 7, 7, 512}, DTYPE_F32, NULL); + nn_init_tensor(&model->decode_conv1_0_0, 4, (size_t[]){1, 7, 7, 512}, DTYPE_F32, NULL); // : decode_conv1_0_1 - NN_init_tensor(&model->decode_conv1_0_1_weight, 1, (size_t[]){512}, DTYPE_F32, array_pointer); + nn_init_tensor(&model->decode_conv1_0_1_weight, 1, (size_t[]){512}, DTYPE_F32, array_pointer); array_pointer += 512; - NN_init_tensor(&model->decode_conv1_0_1_bias, 1, (size_t[]){512}, DTYPE_F32, array_pointer); + nn_init_tensor(&model->decode_conv1_0_1_bias, 1, (size_t[]){512}, DTYPE_F32, array_pointer); array_pointer += 512; - NN_init_tensor(&model->decode_conv1_0_1_running_mean, 1, (size_t[]){512}, DTYPE_F32, array_pointer); + nn_init_tensor(&model->decode_conv1_0_1_running_mean, 1, (size_t[]){512}, DTYPE_F32, array_pointer); array_pointer += 512; - NN_init_tensor(&model->decode_conv1_0_1_running_var, 1, (size_t[]){512}, DTYPE_F32, array_pointer); + nn_init_tensor(&model->decode_conv1_0_1_running_var, 1, (size_t[]){512}, DTYPE_F32, array_pointer); array_pointer += 512; - NN_init_tensor(&model->decode_conv1_0_1, 4, (size_t[]){1, 7, 7, 512}, DTYPE_F32, NULL); + nn_init_tensor(&model->decode_conv1_0_1, 4, (size_t[]){1, 7, 7, 512}, DTYPE_F32, NULL); // : decode_conv1_0_2 - NN_init_tensor(&model->decode_conv1_0_2, 4, (size_t[]){1, 7, 7, 512}, DTYPE_F32, NULL); + nn_init_tensor(&model->decode_conv1_0_2, 4, (size_t[]){1, 7, 7, 512}, DTYPE_F32, NULL); // : decode_conv1_1_0 - NN_init_tensor(&model->decode_conv1_1_0_weight, 4, (size_t[]){1, 1, 512, 200}, DTYPE_F32, array_pointer); + nn_init_tensor(&model->decode_conv1_1_0_weight, 4, (size_t[]){1, 1, 512, 200}, DTYPE_F32, array_pointer); array_pointer += 102400; - NN_init_tensor(&model->decode_conv1_1_0, 4, (size_t[]){1, 7, 7, 200}, DTYPE_F32, NULL); + nn_init_tensor(&model->decode_conv1_1_0, 4, (size_t[]){1, 7, 7, 200}, DTYPE_F32, NULL); // : decode_conv1_1_1 - NN_init_tensor(&model->decode_conv1_1_1_weight, 1, (size_t[]){200}, DTYPE_F32, array_pointer); + nn_init_tensor(&model->decode_conv1_1_1_weight, 1, (size_t[]){200}, DTYPE_F32, array_pointer); array_pointer += 200; - NN_init_tensor(&model->decode_conv1_1_1_bias, 1, (size_t[]){200}, DTYPE_F32, array_pointer); + nn_init_tensor(&model->decode_conv1_1_1_bias, 1, (size_t[]){200}, DTYPE_F32, array_pointer); array_pointer += 200; - NN_init_tensor(&model->decode_conv1_1_1_running_mean, 1, (size_t[]){200}, DTYPE_F32, array_pointer); + nn_init_tensor(&model->decode_conv1_1_1_running_mean, 1, (size_t[]){200}, DTYPE_F32, array_pointer); array_pointer += 200; - NN_init_tensor(&model->decode_conv1_1_1_running_var, 1, (size_t[]){200}, DTYPE_F32, array_pointer); + nn_init_tensor(&model->decode_conv1_1_1_running_var, 1, (size_t[]){200}, DTYPE_F32, array_pointer); array_pointer += 200; - NN_init_tensor(&model->decode_conv1_1_1, 4, (size_t[]){1, 7, 7, 200}, DTYPE_F32, NULL); + nn_init_tensor(&model->decode_conv1_1_1, 4, (size_t[]){1, 7, 7, 200}, DTYPE_F32, NULL); // : decode_conv1_1_2 - NN_init_tensor(&model->decode_conv1_1_2, 4, (size_t[]){1, 7, 7, 200}, DTYPE_F32, NULL); - NN_init_tensor(&model->interpolate, 4, (size_t[]){1, 14, 14, 200}, DTYPE_F32, NULL); + nn_init_tensor(&model->decode_conv1_1_2, 4, (size_t[]){1, 7, 7, 200}, DTYPE_F32, NULL); + nn_init_tensor(&model->interpolate, 4, (size_t[]){1, 14, 14, 200}, DTYPE_F32, NULL); // : decode_conv2_0_0 - NN_init_tensor(&model->decode_conv2_0_0_weight, 4, (size_t[]){5, 5, 1, 200}, DTYPE_F32, array_pointer); + nn_init_tensor(&model->decode_conv2_0_0_weight, 4, (size_t[]){5, 5, 1, 200}, DTYPE_F32, array_pointer); array_pointer += 5000; - NN_init_tensor(&model->decode_conv2_0_0, 4, (size_t[]){1, 14, 14, 200}, DTYPE_F32, NULL); + nn_init_tensor(&model->decode_conv2_0_0, 4, (size_t[]){1, 14, 14, 200}, DTYPE_F32, NULL); // : decode_conv2_0_1 - NN_init_tensor(&model->decode_conv2_0_1_weight, 1, (size_t[]){200}, DTYPE_F32, array_pointer); + nn_init_tensor(&model->decode_conv2_0_1_weight, 1, (size_t[]){200}, DTYPE_F32, array_pointer); array_pointer += 200; - NN_init_tensor(&model->decode_conv2_0_1_bias, 1, (size_t[]){200}, DTYPE_F32, array_pointer); + nn_init_tensor(&model->decode_conv2_0_1_bias, 1, (size_t[]){200}, DTYPE_F32, array_pointer); array_pointer += 200; - NN_init_tensor(&model->decode_conv2_0_1_running_mean, 1, (size_t[]){200}, DTYPE_F32, array_pointer); + nn_init_tensor(&model->decode_conv2_0_1_running_mean, 1, (size_t[]){200}, DTYPE_F32, array_pointer); array_pointer += 200; - NN_init_tensor(&model->decode_conv2_0_1_running_var, 1, (size_t[]){200}, DTYPE_F32, array_pointer); + nn_init_tensor(&model->decode_conv2_0_1_running_var, 1, (size_t[]){200}, DTYPE_F32, array_pointer); array_pointer += 200; - NN_init_tensor(&model->decode_conv2_0_1, 4, (size_t[]){1, 14, 14, 200}, DTYPE_F32, NULL); + nn_init_tensor(&model->decode_conv2_0_1, 4, (size_t[]){1, 14, 14, 200}, DTYPE_F32, NULL); // : decode_conv2_0_2 - NN_init_tensor(&model->decode_conv2_0_2, 4, (size_t[]){1, 14, 14, 200}, DTYPE_F32, NULL); + nn_init_tensor(&model->decode_conv2_0_2, 4, (size_t[]){1, 14, 14, 200}, DTYPE_F32, NULL); // : decode_conv2_1_0 - NN_init_tensor(&model->decode_conv2_1_0_weight, 4, (size_t[]){1, 1, 200, 256}, DTYPE_F32, array_pointer); + nn_init_tensor(&model->decode_conv2_1_0_weight, 4, (size_t[]){1, 1, 200, 256}, DTYPE_F32, array_pointer); array_pointer += 51200; - NN_init_tensor(&model->decode_conv2_1_0, 4, (size_t[]){1, 14, 14, 256}, DTYPE_F32, NULL); + nn_init_tensor(&model->decode_conv2_1_0, 4, (size_t[]){1, 14, 14, 256}, DTYPE_F32, NULL); // : decode_conv2_1_1 - NN_init_tensor(&model->decode_conv2_1_1_weight, 1, (size_t[]){256}, DTYPE_F32, array_pointer); + nn_init_tensor(&model->decode_conv2_1_1_weight, 1, (size_t[]){256}, DTYPE_F32, array_pointer); array_pointer += 256; - NN_init_tensor(&model->decode_conv2_1_1_bias, 1, (size_t[]){256}, DTYPE_F32, array_pointer); + nn_init_tensor(&model->decode_conv2_1_1_bias, 1, (size_t[]){256}, DTYPE_F32, array_pointer); array_pointer += 256; - NN_init_tensor(&model->decode_conv2_1_1_running_mean, 1, (size_t[]){256}, DTYPE_F32, array_pointer); + nn_init_tensor(&model->decode_conv2_1_1_running_mean, 1, (size_t[]){256}, DTYPE_F32, array_pointer); array_pointer += 256; - NN_init_tensor(&model->decode_conv2_1_1_running_var, 1, (size_t[]){256}, DTYPE_F32, array_pointer); + nn_init_tensor(&model->decode_conv2_1_1_running_var, 1, (size_t[]){256}, DTYPE_F32, array_pointer); array_pointer += 256; - NN_init_tensor(&model->decode_conv2_1_1, 4, (size_t[]){1, 14, 14, 256}, DTYPE_F32, NULL); + nn_init_tensor(&model->decode_conv2_1_1, 4, (size_t[]){1, 14, 14, 256}, DTYPE_F32, NULL); // : decode_conv2_1_2 - NN_init_tensor(&model->decode_conv2_1_2, 4, (size_t[]){1, 14, 14, 256}, DTYPE_F32, NULL); - NN_init_tensor(&model->interpolate_1, 4, (size_t[]){1, 28, 28, 256}, DTYPE_F32, NULL); - NN_init_tensor(&model->add, 4, (size_t[]){1, 28, 28, 256}, DTYPE_F32, NULL); + nn_init_tensor(&model->decode_conv2_1_2, 4, (size_t[]){1, 14, 14, 256}, DTYPE_F32, NULL); + nn_init_tensor(&model->interpolate_1, 4, (size_t[]){1, 28, 28, 256}, DTYPE_F32, NULL); + nn_init_tensor(&model->add, 4, (size_t[]){1, 28, 28, 256}, DTYPE_F32, NULL); // : decode_conv3_0_0 - NN_init_tensor(&model->decode_conv3_0_0_weight, 4, (size_t[]){5, 5, 1, 256}, DTYPE_F32, array_pointer); + nn_init_tensor(&model->decode_conv3_0_0_weight, 4, (size_t[]){5, 5, 1, 256}, DTYPE_F32, array_pointer); array_pointer += 6400; - NN_init_tensor(&model->decode_conv3_0_0, 4, (size_t[]){1, 28, 28, 256}, DTYPE_F32, NULL); + nn_init_tensor(&model->decode_conv3_0_0, 4, (size_t[]){1, 28, 28, 256}, DTYPE_F32, NULL); // : decode_conv3_0_1 - NN_init_tensor(&model->decode_conv3_0_1_weight, 1, (size_t[]){256}, DTYPE_F32, array_pointer); + nn_init_tensor(&model->decode_conv3_0_1_weight, 1, (size_t[]){256}, DTYPE_F32, array_pointer); array_pointer += 256; - NN_init_tensor(&model->decode_conv3_0_1_bias, 1, (size_t[]){256}, DTYPE_F32, array_pointer); + nn_init_tensor(&model->decode_conv3_0_1_bias, 1, (size_t[]){256}, DTYPE_F32, array_pointer); array_pointer += 256; - NN_init_tensor(&model->decode_conv3_0_1_running_mean, 1, (size_t[]){256}, DTYPE_F32, array_pointer); + nn_init_tensor(&model->decode_conv3_0_1_running_mean, 1, (size_t[]){256}, DTYPE_F32, array_pointer); array_pointer += 256; - NN_init_tensor(&model->decode_conv3_0_1_running_var, 1, (size_t[]){256}, DTYPE_F32, array_pointer); + nn_init_tensor(&model->decode_conv3_0_1_running_var, 1, (size_t[]){256}, DTYPE_F32, array_pointer); array_pointer += 256; - NN_init_tensor(&model->decode_conv3_0_1, 4, (size_t[]){1, 28, 28, 256}, DTYPE_F32, NULL); + nn_init_tensor(&model->decode_conv3_0_1, 4, (size_t[]){1, 28, 28, 256}, DTYPE_F32, NULL); // : decode_conv3_0_2 - NN_init_tensor(&model->decode_conv3_0_2, 4, (size_t[]){1, 28, 28, 256}, DTYPE_F32, NULL); + nn_init_tensor(&model->decode_conv3_0_2, 4, (size_t[]){1, 28, 28, 256}, DTYPE_F32, NULL); // : decode_conv3_1_0 - NN_init_tensor(&model->decode_conv3_1_0_weight, 4, (size_t[]){1, 1, 256, 120}, DTYPE_F32, array_pointer); + nn_init_tensor(&model->decode_conv3_1_0_weight, 4, (size_t[]){1, 1, 256, 120}, DTYPE_F32, array_pointer); array_pointer += 30720; - NN_init_tensor(&model->decode_conv3_1_0, 4, (size_t[]){1, 28, 28, 120}, DTYPE_F32, NULL); + nn_init_tensor(&model->decode_conv3_1_0, 4, (size_t[]){1, 28, 28, 120}, DTYPE_F32, NULL); // : decode_conv3_1_1 - NN_init_tensor(&model->decode_conv3_1_1_weight, 1, (size_t[]){120}, DTYPE_F32, array_pointer); + nn_init_tensor(&model->decode_conv3_1_1_weight, 1, (size_t[]){120}, DTYPE_F32, array_pointer); array_pointer += 120; - NN_init_tensor(&model->decode_conv3_1_1_bias, 1, (size_t[]){120}, DTYPE_F32, array_pointer); + nn_init_tensor(&model->decode_conv3_1_1_bias, 1, (size_t[]){120}, DTYPE_F32, array_pointer); array_pointer += 120; - NN_init_tensor(&model->decode_conv3_1_1_running_mean, 1, (size_t[]){120}, DTYPE_F32, array_pointer); + nn_init_tensor(&model->decode_conv3_1_1_running_mean, 1, (size_t[]){120}, DTYPE_F32, array_pointer); array_pointer += 120; - NN_init_tensor(&model->decode_conv3_1_1_running_var, 1, (size_t[]){120}, DTYPE_F32, array_pointer); + nn_init_tensor(&model->decode_conv3_1_1_running_var, 1, (size_t[]){120}, DTYPE_F32, array_pointer); array_pointer += 120; - NN_init_tensor(&model->decode_conv3_1_1, 4, (size_t[]){1, 28, 28, 120}, DTYPE_F32, NULL); + nn_init_tensor(&model->decode_conv3_1_1, 4, (size_t[]){1, 28, 28, 120}, DTYPE_F32, NULL); // : decode_conv3_1_2 - NN_init_tensor(&model->decode_conv3_1_2, 4, (size_t[]){1, 28, 28, 120}, DTYPE_F32, NULL); - NN_init_tensor(&model->interpolate_2, 4, (size_t[]){1, 56, 56, 120}, DTYPE_F32, NULL); - NN_init_tensor(&model->add_1, 4, (size_t[]){1, 56, 56, 120}, DTYPE_F32, NULL); + nn_init_tensor(&model->decode_conv3_1_2, 4, (size_t[]){1, 28, 28, 120}, DTYPE_F32, NULL); + nn_init_tensor(&model->interpolate_2, 4, (size_t[]){1, 56, 56, 120}, DTYPE_F32, NULL); + nn_init_tensor(&model->add_1, 4, (size_t[]){1, 56, 56, 120}, DTYPE_F32, NULL); // : decode_conv4_0_0 - NN_init_tensor(&model->decode_conv4_0_0_weight, 4, (size_t[]){5, 5, 1, 120}, DTYPE_F32, array_pointer); + nn_init_tensor(&model->decode_conv4_0_0_weight, 4, (size_t[]){5, 5, 1, 120}, DTYPE_F32, array_pointer); array_pointer += 3000; - NN_init_tensor(&model->decode_conv4_0_0, 4, (size_t[]){1, 56, 56, 120}, DTYPE_F32, NULL); + nn_init_tensor(&model->decode_conv4_0_0, 4, (size_t[]){1, 56, 56, 120}, DTYPE_F32, NULL); // : decode_conv4_0_1 - NN_init_tensor(&model->decode_conv4_0_1_weight, 1, (size_t[]){120}, DTYPE_F32, array_pointer); + nn_init_tensor(&model->decode_conv4_0_1_weight, 1, (size_t[]){120}, DTYPE_F32, array_pointer); array_pointer += 120; - NN_init_tensor(&model->decode_conv4_0_1_bias, 1, (size_t[]){120}, DTYPE_F32, array_pointer); + nn_init_tensor(&model->decode_conv4_0_1_bias, 1, (size_t[]){120}, DTYPE_F32, array_pointer); array_pointer += 120; - NN_init_tensor(&model->decode_conv4_0_1_running_mean, 1, (size_t[]){120}, DTYPE_F32, array_pointer); + nn_init_tensor(&model->decode_conv4_0_1_running_mean, 1, (size_t[]){120}, DTYPE_F32, array_pointer); array_pointer += 120; - NN_init_tensor(&model->decode_conv4_0_1_running_var, 1, (size_t[]){120}, DTYPE_F32, array_pointer); + nn_init_tensor(&model->decode_conv4_0_1_running_var, 1, (size_t[]){120}, DTYPE_F32, array_pointer); array_pointer += 120; - NN_init_tensor(&model->decode_conv4_0_1, 4, (size_t[]){1, 56, 56, 120}, DTYPE_F32, NULL); + nn_init_tensor(&model->decode_conv4_0_1, 4, (size_t[]){1, 56, 56, 120}, DTYPE_F32, NULL); // : decode_conv4_0_2 - NN_init_tensor(&model->decode_conv4_0_2, 4, (size_t[]){1, 56, 56, 120}, DTYPE_F32, NULL); + nn_init_tensor(&model->decode_conv4_0_2, 4, (size_t[]){1, 56, 56, 120}, DTYPE_F32, NULL); // : decode_conv4_1_0 - NN_init_tensor(&model->decode_conv4_1_0_weight, 4, (size_t[]){1, 1, 120, 56}, DTYPE_F32, array_pointer); + nn_init_tensor(&model->decode_conv4_1_0_weight, 4, (size_t[]){1, 1, 120, 56}, DTYPE_F32, array_pointer); array_pointer += 6720; - NN_init_tensor(&model->decode_conv4_1_0, 4, (size_t[]){1, 56, 56, 56}, DTYPE_F32, NULL); + nn_init_tensor(&model->decode_conv4_1_0, 4, (size_t[]){1, 56, 56, 56}, DTYPE_F32, NULL); // : decode_conv4_1_1 - NN_init_tensor(&model->decode_conv4_1_1_weight, 1, (size_t[]){56}, DTYPE_F32, array_pointer); + nn_init_tensor(&model->decode_conv4_1_1_weight, 1, (size_t[]){56}, DTYPE_F32, array_pointer); array_pointer += 56; - NN_init_tensor(&model->decode_conv4_1_1_bias, 1, (size_t[]){56}, DTYPE_F32, array_pointer); + nn_init_tensor(&model->decode_conv4_1_1_bias, 1, (size_t[]){56}, DTYPE_F32, array_pointer); array_pointer += 56; - NN_init_tensor(&model->decode_conv4_1_1_running_mean, 1, (size_t[]){56}, DTYPE_F32, array_pointer); + nn_init_tensor(&model->decode_conv4_1_1_running_mean, 1, (size_t[]){56}, DTYPE_F32, array_pointer); array_pointer += 56; - NN_init_tensor(&model->decode_conv4_1_1_running_var, 1, (size_t[]){56}, DTYPE_F32, array_pointer); + nn_init_tensor(&model->decode_conv4_1_1_running_var, 1, (size_t[]){56}, DTYPE_F32, array_pointer); array_pointer += 56; - NN_init_tensor(&model->decode_conv4_1_1, 4, (size_t[]){1, 56, 56, 56}, DTYPE_F32, NULL); + nn_init_tensor(&model->decode_conv4_1_1, 4, (size_t[]){1, 56, 56, 56}, DTYPE_F32, NULL); // : decode_conv4_1_2 - NN_init_tensor(&model->decode_conv4_1_2, 4, (size_t[]){1, 56, 56, 56}, DTYPE_F32, NULL); - NN_init_tensor(&model->interpolate_3, 4, (size_t[]){1, 112, 112, 56}, DTYPE_F32, NULL); - NN_init_tensor(&model->add_2, 4, (size_t[]){1, 112, 112, 56}, DTYPE_F32, NULL); + nn_init_tensor(&model->decode_conv4_1_2, 4, (size_t[]){1, 56, 56, 56}, DTYPE_F32, NULL); + nn_init_tensor(&model->interpolate_3, 4, (size_t[]){1, 112, 112, 56}, DTYPE_F32, NULL); + nn_init_tensor(&model->add_2, 4, (size_t[]){1, 112, 112, 56}, DTYPE_F32, NULL); // : decode_conv5_0_0 - NN_init_tensor(&model->decode_conv5_0_0_weight, 4, (size_t[]){5, 5, 1, 56}, DTYPE_F32, array_pointer); + nn_init_tensor(&model->decode_conv5_0_0_weight, 4, (size_t[]){5, 5, 1, 56}, DTYPE_F32, array_pointer); array_pointer += 1400; - NN_init_tensor(&model->decode_conv5_0_0, 4, (size_t[]){1, 112, 112, 56}, DTYPE_F32, NULL); + nn_init_tensor(&model->decode_conv5_0_0, 4, (size_t[]){1, 112, 112, 56}, DTYPE_F32, NULL); // : decode_conv5_0_1 - NN_init_tensor(&model->decode_conv5_0_1_weight, 1, (size_t[]){56}, DTYPE_F32, array_pointer); + nn_init_tensor(&model->decode_conv5_0_1_weight, 1, (size_t[]){56}, DTYPE_F32, array_pointer); array_pointer += 56; - NN_init_tensor(&model->decode_conv5_0_1_bias, 1, (size_t[]){56}, DTYPE_F32, array_pointer); + nn_init_tensor(&model->decode_conv5_0_1_bias, 1, (size_t[]){56}, DTYPE_F32, array_pointer); array_pointer += 56; - NN_init_tensor(&model->decode_conv5_0_1_running_mean, 1, (size_t[]){56}, DTYPE_F32, array_pointer); + nn_init_tensor(&model->decode_conv5_0_1_running_mean, 1, (size_t[]){56}, DTYPE_F32, array_pointer); array_pointer += 56; - NN_init_tensor(&model->decode_conv5_0_1_running_var, 1, (size_t[]){56}, DTYPE_F32, array_pointer); + nn_init_tensor(&model->decode_conv5_0_1_running_var, 1, (size_t[]){56}, DTYPE_F32, array_pointer); array_pointer += 56; - NN_init_tensor(&model->decode_conv5_0_1, 4, (size_t[]){1, 112, 112, 56}, DTYPE_F32, NULL); + nn_init_tensor(&model->decode_conv5_0_1, 4, (size_t[]){1, 112, 112, 56}, DTYPE_F32, NULL); // : decode_conv5_0_2 - NN_init_tensor(&model->decode_conv5_0_2, 4, (size_t[]){1, 112, 112, 56}, DTYPE_F32, NULL); + nn_init_tensor(&model->decode_conv5_0_2, 4, (size_t[]){1, 112, 112, 56}, DTYPE_F32, NULL); // : decode_conv5_1_0 - NN_init_tensor(&model->decode_conv5_1_0_weight, 4, (size_t[]){1, 1, 56, 16}, DTYPE_F32, array_pointer); + nn_init_tensor(&model->decode_conv5_1_0_weight, 4, (size_t[]){1, 1, 56, 16}, DTYPE_F32, array_pointer); array_pointer += 896; - NN_init_tensor(&model->decode_conv5_1_0, 4, (size_t[]){1, 112, 112, 16}, DTYPE_F32, NULL); + nn_init_tensor(&model->decode_conv5_1_0, 4, (size_t[]){1, 112, 112, 16}, DTYPE_F32, NULL); // : decode_conv5_1_1 - NN_init_tensor(&model->decode_conv5_1_1_weight, 1, (size_t[]){16}, DTYPE_F32, array_pointer); + nn_init_tensor(&model->decode_conv5_1_1_weight, 1, (size_t[]){16}, DTYPE_F32, array_pointer); array_pointer += 16; - NN_init_tensor(&model->decode_conv5_1_1_bias, 1, (size_t[]){16}, DTYPE_F32, array_pointer); + nn_init_tensor(&model->decode_conv5_1_1_bias, 1, (size_t[]){16}, DTYPE_F32, array_pointer); array_pointer += 16; - NN_init_tensor(&model->decode_conv5_1_1_running_mean, 1, (size_t[]){16}, DTYPE_F32, array_pointer); + nn_init_tensor(&model->decode_conv5_1_1_running_mean, 1, (size_t[]){16}, DTYPE_F32, array_pointer); array_pointer += 16; - NN_init_tensor(&model->decode_conv5_1_1_running_var, 1, (size_t[]){16}, DTYPE_F32, array_pointer); + nn_init_tensor(&model->decode_conv5_1_1_running_var, 1, (size_t[]){16}, DTYPE_F32, array_pointer); array_pointer += 16; - NN_init_tensor(&model->decode_conv5_1_1, 4, (size_t[]){1, 112, 112, 16}, DTYPE_F32, NULL); + nn_init_tensor(&model->decode_conv5_1_1, 4, (size_t[]){1, 112, 112, 16}, DTYPE_F32, NULL); // : decode_conv5_1_2 - NN_init_tensor(&model->decode_conv5_1_2, 4, (size_t[]){1, 112, 112, 16}, DTYPE_F32, NULL); - NN_init_tensor(&model->interpolate_4, 4, (size_t[]){1, 224, 224, 16}, DTYPE_F32, NULL); + nn_init_tensor(&model->decode_conv5_1_2, 4, (size_t[]){1, 112, 112, 16}, DTYPE_F32, NULL); + nn_init_tensor(&model->interpolate_4, 4, (size_t[]){1, 224, 224, 16}, DTYPE_F32, NULL); // : decode_conv6_0 - NN_init_tensor(&model->decode_conv6_0_weight, 4, (size_t[]){1, 1, 16, 1}, DTYPE_F32, array_pointer); + nn_init_tensor(&model->decode_conv6_0_weight, 4, (size_t[]){1, 1, 16, 1}, DTYPE_F32, array_pointer); array_pointer += 16; - NN_init_tensor(&model->decode_conv6_0, 4, (size_t[]){1, 224, 224, 1}, DTYPE_F32, NULL); + nn_init_tensor(&model->decode_conv6_0, 4, (size_t[]){1, 224, 224, 1}, DTYPE_F32, NULL); // : decode_conv6_1 - NN_init_tensor(&model->decode_conv6_1_weight, 1, (size_t[]){1}, DTYPE_F32, array_pointer); + nn_init_tensor(&model->decode_conv6_1_weight, 1, (size_t[]){1}, DTYPE_F32, array_pointer); array_pointer += 1; - NN_init_tensor(&model->decode_conv6_1_bias, 1, (size_t[]){1}, DTYPE_F32, array_pointer); + nn_init_tensor(&model->decode_conv6_1_bias, 1, (size_t[]){1}, DTYPE_F32, array_pointer); array_pointer += 1; - NN_init_tensor(&model->decode_conv6_1_running_mean, 1, (size_t[]){1}, DTYPE_F32, array_pointer); + nn_init_tensor(&model->decode_conv6_1_running_mean, 1, (size_t[]){1}, DTYPE_F32, array_pointer); array_pointer += 1; - NN_init_tensor(&model->decode_conv6_1_running_var, 1, (size_t[]){1}, DTYPE_F32, array_pointer); + nn_init_tensor(&model->decode_conv6_1_running_var, 1, (size_t[]){1}, DTYPE_F32, array_pointer); array_pointer += 1; - NN_init_tensor(&model->decode_conv6_1, 4, (size_t[]){1, 224, 224, 1}, DTYPE_F32, NULL); + nn_init_tensor(&model->decode_conv6_1, 4, (size_t[]){1, 224, 224, 1}, DTYPE_F32, NULL); // : decode_conv6_2 - NN_init_tensor(&model->decode_conv6_2, 4, (size_t[]){1, 224, 224, 1}, DTYPE_F32, NULL); + nn_init_tensor(&model->decode_conv6_2, 4, (size_t[]){1, 224, 224, 1}, DTYPE_F32, NULL); } @@ -1077,334 +1077,334 @@ void init(Model *model) { * Forward pass of the model */ void forward(Model *model) { - NN_conv2d( + nn_conv2d( &model->conv0_0, &model->x, &model->conv0_0_weight, NULL, (size_t[]){2, 2}, (size_t[]){1, 1}, (size_t[]){1, 1}, 1); - NN_batch_norm2d( + nn_batch_norm2d( &model->conv0_1, &model->conv0_0, &model->conv0_1_weight, &model->conv0_1_bias, 1e-05, &model->conv0_1_running_mean, &model->conv0_1_running_var); - NN_relu6(&model->conv0_2, &model->conv0_1); - NN_conv2d( + nn_relu6(&model->conv0_2, &model->conv0_1); + nn_conv2d( &model->conv1_0, &model->conv0_2, &model->conv1_0_weight, NULL, (size_t[]){1, 1}, (size_t[]){1, 1}, (size_t[]){1, 1}, 16); - NN_batch_norm2d( + nn_batch_norm2d( &model->conv1_1, &model->conv1_0, &model->conv1_1_weight, &model->conv1_1_bias, 1e-05, &model->conv1_1_running_mean, &model->conv1_1_running_var); - NN_relu6(&model->conv1_2, &model->conv1_1); - NN_conv2d( + nn_relu6(&model->conv1_2, &model->conv1_1); + nn_conv2d( &model->conv1_3, &model->conv1_2, &model->conv1_3_weight, NULL, (size_t[]){1, 1}, (size_t[]){0, 0}, (size_t[]){1, 1}, 1); - NN_batch_norm2d( + nn_batch_norm2d( &model->conv1_4, &model->conv1_3, &model->conv1_4_weight, &model->conv1_4_bias, 1e-05, &model->conv1_4_running_mean, &model->conv1_4_running_var); - NN_relu6(&model->conv1_5, &model->conv1_4); - NN_conv2d( + nn_relu6(&model->conv1_5, &model->conv1_4); + nn_conv2d( &model->conv2_0, &model->conv1_5, &model->conv2_0_weight, NULL, (size_t[]){2, 2}, (size_t[]){1, 1}, (size_t[]){1, 1}, 56); - NN_batch_norm2d( + nn_batch_norm2d( &model->conv2_1, &model->conv2_0, &model->conv2_1_weight, &model->conv2_1_bias, 1e-05, &model->conv2_1_running_mean, &model->conv2_1_running_var); - NN_relu6(&model->conv2_2, &model->conv2_1); - NN_conv2d( + nn_relu6(&model->conv2_2, &model->conv2_1); + nn_conv2d( &model->conv2_3, &model->conv2_2, &model->conv2_3_weight, NULL, (size_t[]){1, 1}, (size_t[]){0, 0}, (size_t[]){1, 1}, 1); - NN_batch_norm2d( + nn_batch_norm2d( &model->conv2_4, &model->conv2_3, &model->conv2_4_weight, &model->conv2_4_bias, 1e-05, &model->conv2_4_running_mean, &model->conv2_4_running_var); - NN_relu6(&model->conv2_5, &model->conv2_4); - NN_conv2d( + nn_relu6(&model->conv2_5, &model->conv2_4); + nn_conv2d( &model->conv3_0, &model->conv2_5, &model->conv3_0_weight, NULL, (size_t[]){1, 1}, (size_t[]){1, 1}, (size_t[]){1, 1}, 88); - NN_batch_norm2d( + nn_batch_norm2d( &model->conv3_1, &model->conv3_0, &model->conv3_1_weight, &model->conv3_1_bias, 1e-05, &model->conv3_1_running_mean, &model->conv3_1_running_var); - NN_relu6(&model->conv3_2, &model->conv3_1); - NN_conv2d( + nn_relu6(&model->conv3_2, &model->conv3_1); + nn_conv2d( &model->conv3_3, &model->conv3_2, &model->conv3_3_weight, NULL, (size_t[]){1, 1}, (size_t[]){0, 0}, (size_t[]){1, 1}, 1); - NN_batch_norm2d( + nn_batch_norm2d( &model->conv3_4, &model->conv3_3, &model->conv3_4_weight, &model->conv3_4_bias, 1e-05, &model->conv3_4_running_mean, &model->conv3_4_running_var); - NN_relu6(&model->conv3_5, &model->conv3_4); - NN_conv2d( + nn_relu6(&model->conv3_5, &model->conv3_4); + nn_conv2d( &model->conv4_0, &model->conv3_5, &model->conv4_0_weight, NULL, (size_t[]){2, 2}, (size_t[]){1, 1}, (size_t[]){1, 1}, 120); - NN_batch_norm2d( + nn_batch_norm2d( &model->conv4_1, &model->conv4_0, &model->conv4_1_weight, &model->conv4_1_bias, 1e-05, &model->conv4_1_running_mean, &model->conv4_1_running_var); - NN_relu6(&model->conv4_2, &model->conv4_1); - NN_conv2d( + nn_relu6(&model->conv4_2, &model->conv4_1); + nn_conv2d( &model->conv4_3, &model->conv4_2, &model->conv4_3_weight, NULL, (size_t[]){1, 1}, (size_t[]){0, 0}, (size_t[]){1, 1}, 1); - NN_batch_norm2d( + nn_batch_norm2d( &model->conv4_4, &model->conv4_3, &model->conv4_4_weight, &model->conv4_4_bias, 1e-05, &model->conv4_4_running_mean, &model->conv4_4_running_var); - NN_relu6(&model->conv4_5, &model->conv4_4); - NN_conv2d( + nn_relu6(&model->conv4_5, &model->conv4_4); + nn_conv2d( &model->conv5_0, &model->conv4_5, &model->conv5_0_weight, NULL, (size_t[]){1, 1}, (size_t[]){1, 1}, (size_t[]){1, 1}, 144); - NN_batch_norm2d( + nn_batch_norm2d( &model->conv5_1, &model->conv5_0, &model->conv5_1_weight, &model->conv5_1_bias, 1e-05, &model->conv5_1_running_mean, &model->conv5_1_running_var); - NN_relu6(&model->conv5_2, &model->conv5_1); - NN_conv2d( + nn_relu6(&model->conv5_2, &model->conv5_1); + nn_conv2d( &model->conv5_3, &model->conv5_2, &model->conv5_3_weight, NULL, (size_t[]){1, 1}, (size_t[]){0, 0}, (size_t[]){1, 1}, 1); - NN_batch_norm2d( + nn_batch_norm2d( &model->conv5_4, &model->conv5_3, &model->conv5_4_weight, &model->conv5_4_bias, 1e-05, &model->conv5_4_running_mean, &model->conv5_4_running_var); - NN_relu6(&model->conv5_5, &model->conv5_4); - NN_conv2d( + nn_relu6(&model->conv5_5, &model->conv5_4); + nn_conv2d( &model->conv6_0, &model->conv5_5, &model->conv6_0_weight, NULL, (size_t[]){2, 2}, (size_t[]){1, 1}, (size_t[]){1, 1}, 256); - NN_batch_norm2d( + nn_batch_norm2d( &model->conv6_1, &model->conv6_0, &model->conv6_1_weight, &model->conv6_1_bias, 1e-05, &model->conv6_1_running_mean, &model->conv6_1_running_var); - NN_relu6(&model->conv6_2, &model->conv6_1); - NN_conv2d( + nn_relu6(&model->conv6_2, &model->conv6_1); + nn_conv2d( &model->conv6_3, &model->conv6_2, &model->conv6_3_weight, NULL, (size_t[]){1, 1}, (size_t[]){0, 0}, (size_t[]){1, 1}, 1); - NN_batch_norm2d( + nn_batch_norm2d( &model->conv6_4, &model->conv6_3, &model->conv6_4_weight, &model->conv6_4_bias, 1e-05, &model->conv6_4_running_mean, &model->conv6_4_running_var); - NN_relu6(&model->conv6_5, &model->conv6_4); - NN_conv2d( + nn_relu6(&model->conv6_5, &model->conv6_4); + nn_conv2d( &model->conv7_0, &model->conv6_5, &model->conv7_0_weight, NULL, (size_t[]){1, 1}, (size_t[]){1, 1}, (size_t[]){1, 1}, 408); - NN_batch_norm2d( + nn_batch_norm2d( &model->conv7_1, &model->conv7_0, &model->conv7_1_weight, &model->conv7_1_bias, 1e-05, &model->conv7_1_running_mean, &model->conv7_1_running_var); - NN_relu6(&model->conv7_2, &model->conv7_1); - NN_conv2d( + nn_relu6(&model->conv7_2, &model->conv7_1); + nn_conv2d( &model->conv7_3, &model->conv7_2, &model->conv7_3_weight, NULL, (size_t[]){1, 1}, (size_t[]){0, 0}, (size_t[]){1, 1}, 1); - NN_batch_norm2d( + nn_batch_norm2d( &model->conv7_4, &model->conv7_3, &model->conv7_4_weight, &model->conv7_4_bias, 1e-05, &model->conv7_4_running_mean, &model->conv7_4_running_var); - NN_relu6(&model->conv7_5, &model->conv7_4); - NN_conv2d( + nn_relu6(&model->conv7_5, &model->conv7_4); + nn_conv2d( &model->conv8_0, &model->conv7_5, &model->conv8_0_weight, NULL, (size_t[]){1, 1}, (size_t[]){1, 1}, (size_t[]){1, 1}, 376); - NN_batch_norm2d( + nn_batch_norm2d( &model->conv8_1, &model->conv8_0, &model->conv8_1_weight, &model->conv8_1_bias, 1e-05, &model->conv8_1_running_mean, &model->conv8_1_running_var); - NN_relu6(&model->conv8_2, &model->conv8_1); - NN_conv2d( + nn_relu6(&model->conv8_2, &model->conv8_1); + nn_conv2d( &model->conv8_3, &model->conv8_2, &model->conv8_3_weight, NULL, (size_t[]){1, 1}, (size_t[]){0, 0}, (size_t[]){1, 1}, 1); - NN_batch_norm2d( + nn_batch_norm2d( &model->conv8_4, &model->conv8_3, &model->conv8_4_weight, &model->conv8_4_bias, 1e-05, &model->conv8_4_running_mean, &model->conv8_4_running_var); - NN_relu6(&model->conv8_5, &model->conv8_4); - NN_conv2d( + nn_relu6(&model->conv8_5, &model->conv8_4); + nn_conv2d( &model->conv9_0, &model->conv8_5, &model->conv9_0_weight, NULL, (size_t[]){1, 1}, (size_t[]){1, 1}, (size_t[]){1, 1}, 272); - NN_batch_norm2d( + nn_batch_norm2d( &model->conv9_1, &model->conv9_0, &model->conv9_1_weight, &model->conv9_1_bias, 1e-05, &model->conv9_1_running_mean, &model->conv9_1_running_var); - NN_relu6(&model->conv9_2, &model->conv9_1); - NN_conv2d( + nn_relu6(&model->conv9_2, &model->conv9_1); + nn_conv2d( &model->conv9_3, &model->conv9_2, &model->conv9_3_weight, NULL, (size_t[]){1, 1}, (size_t[]){0, 0}, (size_t[]){1, 1}, 1); - NN_batch_norm2d( + nn_batch_norm2d( &model->conv9_4, &model->conv9_3, &model->conv9_4_weight, &model->conv9_4_bias, 1e-05, &model->conv9_4_running_mean, &model->conv9_4_running_var); - NN_relu6(&model->conv9_5, &model->conv9_4); - NN_conv2d( + nn_relu6(&model->conv9_5, &model->conv9_4); + nn_conv2d( &model->conv10_0, &model->conv9_5, &model->conv10_0_weight, NULL, (size_t[]){1, 1}, (size_t[]){1, 1}, (size_t[]){1, 1}, 288); - NN_batch_norm2d( + nn_batch_norm2d( &model->conv10_1, &model->conv10_0, &model->conv10_1_weight, &model->conv10_1_bias, 1e-05, &model->conv10_1_running_mean, &model->conv10_1_running_var); - NN_relu6(&model->conv10_2, &model->conv10_1); - NN_conv2d( + nn_relu6(&model->conv10_2, &model->conv10_1); + nn_conv2d( &model->conv10_3, &model->conv10_2, &model->conv10_3_weight, NULL, (size_t[]){1, 1}, (size_t[]){0, 0}, (size_t[]){1, 1}, 1); - NN_batch_norm2d( + nn_batch_norm2d( &model->conv10_4, &model->conv10_3, &model->conv10_4_weight, &model->conv10_4_bias, 1e-05, &model->conv10_4_running_mean, &model->conv10_4_running_var); - NN_relu6(&model->conv10_5, &model->conv10_4); - NN_conv2d( + nn_relu6(&model->conv10_5, &model->conv10_4); + nn_conv2d( &model->conv11_0, &model->conv10_5, &model->conv11_0_weight, NULL, (size_t[]){1, 1}, (size_t[]){1, 1}, (size_t[]){1, 1}, 296); - NN_batch_norm2d( + nn_batch_norm2d( &model->conv11_1, &model->conv11_0, &model->conv11_1_weight, &model->conv11_1_bias, 1e-05, &model->conv11_1_running_mean, &model->conv11_1_running_var); - NN_relu6(&model->conv11_2, &model->conv11_1); - NN_conv2d( + nn_relu6(&model->conv11_2, &model->conv11_1); + nn_conv2d( &model->conv11_3, &model->conv11_2, &model->conv11_3_weight, NULL, (size_t[]){1, 1}, (size_t[]){0, 0}, (size_t[]){1, 1}, 1); - NN_batch_norm2d( + nn_batch_norm2d( &model->conv11_4, &model->conv11_3, &model->conv11_4_weight, &model->conv11_4_bias, 1e-05, &model->conv11_4_running_mean, &model->conv11_4_running_var); - NN_relu6(&model->conv11_5, &model->conv11_4); - NN_conv2d( + nn_relu6(&model->conv11_5, &model->conv11_4); + nn_conv2d( &model->conv12_0, &model->conv11_5, &model->conv12_0_weight, NULL, (size_t[]){2, 2}, (size_t[]){1, 1}, (size_t[]){1, 1}, 328); - NN_batch_norm2d( + nn_batch_norm2d( &model->conv12_1, &model->conv12_0, &model->conv12_1_weight, &model->conv12_1_bias, 1e-05, &model->conv12_1_running_mean, &model->conv12_1_running_var); - NN_relu6(&model->conv12_2, &model->conv12_1); - NN_conv2d( + nn_relu6(&model->conv12_2, &model->conv12_1); + nn_conv2d( &model->conv12_3, &model->conv12_2, &model->conv12_3_weight, NULL, (size_t[]){1, 1}, (size_t[]){0, 0}, (size_t[]){1, 1}, 1); - NN_batch_norm2d( + nn_batch_norm2d( &model->conv12_4, &model->conv12_3, &model->conv12_4_weight, &model->conv12_4_bias, 1e-05, &model->conv12_4_running_mean, &model->conv12_4_running_var); - NN_relu6(&model->conv12_5, &model->conv12_4); - NN_conv2d( + nn_relu6(&model->conv12_5, &model->conv12_4); + nn_conv2d( &model->conv13_0, &model->conv12_5, &model->conv13_0_weight, NULL, (size_t[]){1, 1}, (size_t[]){1, 1}, (size_t[]){1, 1}, 480); - NN_batch_norm2d( + nn_batch_norm2d( &model->conv13_1, &model->conv13_0, &model->conv13_1_weight, &model->conv13_1_bias, 1e-05, &model->conv13_1_running_mean, &model->conv13_1_running_var); - NN_relu6(&model->conv13_2, &model->conv13_1); - NN_conv2d( + nn_relu6(&model->conv13_2, &model->conv13_1); + nn_conv2d( &model->conv13_3, &model->conv13_2, &model->conv13_3_weight, NULL, (size_t[]){1, 1}, (size_t[]){0, 0}, (size_t[]){1, 1}, 1); - NN_batch_norm2d( + nn_batch_norm2d( &model->conv13_4, &model->conv13_3, &model->conv13_4_weight, &model->conv13_4_bias, 1e-05, &model->conv13_4_running_mean, &model->conv13_4_running_var); - NN_relu6(&model->conv13_5, &model->conv13_4); - NN_conv2d( + nn_relu6(&model->conv13_5, &model->conv13_4); + nn_conv2d( &model->decode_conv1_0_0, &model->conv13_5, &model->decode_conv1_0_0_weight, NULL, (size_t[]){1, 1}, (size_t[]){2, 2}, (size_t[]){1, 1}, 512); - NN_batch_norm2d( + nn_batch_norm2d( &model->decode_conv1_0_1, &model->decode_conv1_0_0, &model->decode_conv1_0_1_weight, &model->decode_conv1_0_1_bias, 1e-05, &model->decode_conv1_0_1_running_mean, &model->decode_conv1_0_1_running_var); - NN_relu(&model->decode_conv1_0_2, &model->decode_conv1_0_1); - NN_conv2d( + nn_relu(&model->decode_conv1_0_2, &model->decode_conv1_0_1); + nn_conv2d( &model->decode_conv1_1_0, &model->decode_conv1_0_2, &model->decode_conv1_1_0_weight, NULL, (size_t[]){1, 1}, (size_t[]){0, 0}, (size_t[]){1, 1}, 1); - NN_batch_norm2d( + nn_batch_norm2d( &model->decode_conv1_1_1, &model->decode_conv1_1_0, &model->decode_conv1_1_1_weight, &model->decode_conv1_1_1_bias, 1e-05, &model->decode_conv1_1_1_running_mean, &model->decode_conv1_1_1_running_var); - NN_relu(&model->decode_conv1_1_2, &model->decode_conv1_1_1); + nn_relu(&model->decode_conv1_1_2, &model->decode_conv1_1_1); // F.interpolate - NN_interpolate(&model->interpolate, &model->decode_conv1_1_2, (float []){2, 2}); + nn_interpolate(&model->interpolate, &model->decode_conv1_1_2, (float []){2, 2}); - NN_conv2d( + nn_conv2d( &model->decode_conv2_0_0, &model->interpolate, &model->decode_conv2_0_0_weight, NULL, (size_t[]){1, 1}, (size_t[]){2, 2}, (size_t[]){1, 1}, 200); - NN_batch_norm2d( + nn_batch_norm2d( &model->decode_conv2_0_1, &model->decode_conv2_0_0, &model->decode_conv2_0_1_weight, &model->decode_conv2_0_1_bias, 1e-05, &model->decode_conv2_0_1_running_mean, &model->decode_conv2_0_1_running_var); - NN_relu(&model->decode_conv2_0_2, &model->decode_conv2_0_1); - NN_conv2d( + nn_relu(&model->decode_conv2_0_2, &model->decode_conv2_0_1); + nn_conv2d( &model->decode_conv2_1_0, &model->decode_conv2_0_2, &model->decode_conv2_1_0_weight, NULL, (size_t[]){1, 1}, (size_t[]){0, 0}, (size_t[]){1, 1}, 1); - NN_batch_norm2d( + nn_batch_norm2d( &model->decode_conv2_1_1, &model->decode_conv2_1_0, &model->decode_conv2_1_1_weight, &model->decode_conv2_1_1_bias, 1e-05, &model->decode_conv2_1_1_running_mean, &model->decode_conv2_1_1_running_var); - NN_relu(&model->decode_conv2_1_2, &model->decode_conv2_1_1); + nn_relu(&model->decode_conv2_1_2, &model->decode_conv2_1_1); // F.interpolate_1 - NN_interpolate(&model->interpolate_1, &model->decode_conv2_1_2, (float []){2, 2}); + nn_interpolate(&model->interpolate_1, &model->decode_conv2_1_2, (float []){2, 2}); // F.add - NN_add(&model->add, &model->interpolate_1, &model->conv5_5); + nn_add(&model->add, &model->interpolate_1, &model->conv5_5); - NN_conv2d( + nn_conv2d( &model->decode_conv3_0_0, &model->add, &model->decode_conv3_0_0_weight, NULL, (size_t[]){1, 1}, (size_t[]){2, 2}, (size_t[]){1, 1}, 256); - NN_batch_norm2d( + nn_batch_norm2d( &model->decode_conv3_0_1, &model->decode_conv3_0_0, &model->decode_conv3_0_1_weight, &model->decode_conv3_0_1_bias, 1e-05, &model->decode_conv3_0_1_running_mean, &model->decode_conv3_0_1_running_var); - NN_relu(&model->decode_conv3_0_2, &model->decode_conv3_0_1); - NN_conv2d( + nn_relu(&model->decode_conv3_0_2, &model->decode_conv3_0_1); + nn_conv2d( &model->decode_conv3_1_0, &model->decode_conv3_0_2, &model->decode_conv3_1_0_weight, NULL, (size_t[]){1, 1}, (size_t[]){0, 0}, (size_t[]){1, 1}, 1); - NN_batch_norm2d( + nn_batch_norm2d( &model->decode_conv3_1_1, &model->decode_conv3_1_0, &model->decode_conv3_1_1_weight, &model->decode_conv3_1_1_bias, 1e-05, &model->decode_conv3_1_1_running_mean, &model->decode_conv3_1_1_running_var); - NN_relu(&model->decode_conv3_1_2, &model->decode_conv3_1_1); + nn_relu(&model->decode_conv3_1_2, &model->decode_conv3_1_1); // F.interpolate_2 - NN_interpolate(&model->interpolate_2, &model->decode_conv3_1_2, (float []){2, 2}); + nn_interpolate(&model->interpolate_2, &model->decode_conv3_1_2, (float []){2, 2}); // F.add_1 - NN_add(&model->add_1, &model->interpolate_2, &model->conv3_5); + nn_add(&model->add_1, &model->interpolate_2, &model->conv3_5); - NN_conv2d( + nn_conv2d( &model->decode_conv4_0_0, &model->add_1, &model->decode_conv4_0_0_weight, NULL, (size_t[]){1, 1}, (size_t[]){2, 2}, (size_t[]){1, 1}, 120); - NN_batch_norm2d( + nn_batch_norm2d( &model->decode_conv4_0_1, &model->decode_conv4_0_0, &model->decode_conv4_0_1_weight, &model->decode_conv4_0_1_bias, 1e-05, &model->decode_conv4_0_1_running_mean, &model->decode_conv4_0_1_running_var); - NN_relu(&model->decode_conv4_0_2, &model->decode_conv4_0_1); - NN_conv2d( + nn_relu(&model->decode_conv4_0_2, &model->decode_conv4_0_1); + nn_conv2d( &model->decode_conv4_1_0, &model->decode_conv4_0_2, &model->decode_conv4_1_0_weight, NULL, (size_t[]){1, 1}, (size_t[]){0, 0}, (size_t[]){1, 1}, 1); - NN_batch_norm2d( + nn_batch_norm2d( &model->decode_conv4_1_1, &model->decode_conv4_1_0, &model->decode_conv4_1_1_weight, &model->decode_conv4_1_1_bias, 1e-05, &model->decode_conv4_1_1_running_mean, &model->decode_conv4_1_1_running_var); - NN_relu(&model->decode_conv4_1_2, &model->decode_conv4_1_1); + nn_relu(&model->decode_conv4_1_2, &model->decode_conv4_1_1); // F.interpolate_3 - NN_interpolate(&model->interpolate_3, &model->decode_conv4_1_2, (float []){2, 2}); + nn_interpolate(&model->interpolate_3, &model->decode_conv4_1_2, (float []){2, 2}); // F.add_2 - NN_add(&model->add_2, &model->interpolate_3, &model->conv1_5); + nn_add(&model->add_2, &model->interpolate_3, &model->conv1_5); - NN_conv2d( + nn_conv2d( &model->decode_conv5_0_0, &model->add_2, &model->decode_conv5_0_0_weight, NULL, (size_t[]){1, 1}, (size_t[]){2, 2}, (size_t[]){1, 1}, 56); - NN_batch_norm2d( + nn_batch_norm2d( &model->decode_conv5_0_1, &model->decode_conv5_0_0, &model->decode_conv5_0_1_weight, &model->decode_conv5_0_1_bias, 1e-05, &model->decode_conv5_0_1_running_mean, &model->decode_conv5_0_1_running_var); - NN_relu(&model->decode_conv5_0_2, &model->decode_conv5_0_1); - NN_conv2d( + nn_relu(&model->decode_conv5_0_2, &model->decode_conv5_0_1); + nn_conv2d( &model->decode_conv5_1_0, &model->decode_conv5_0_2, &model->decode_conv5_1_0_weight, NULL, (size_t[]){1, 1}, (size_t[]){0, 0}, (size_t[]){1, 1}, 1); - NN_batch_norm2d( + nn_batch_norm2d( &model->decode_conv5_1_1, &model->decode_conv5_1_0, &model->decode_conv5_1_1_weight, &model->decode_conv5_1_1_bias, 1e-05, &model->decode_conv5_1_1_running_mean, &model->decode_conv5_1_1_running_var); - NN_relu(&model->decode_conv5_1_2, &model->decode_conv5_1_1); + nn_relu(&model->decode_conv5_1_2, &model->decode_conv5_1_1); // F.interpolate_4 - NN_interpolate(&model->interpolate_4, &model->decode_conv5_1_2, (float []){2, 2}); + nn_interpolate(&model->interpolate_4, &model->decode_conv5_1_2, (float []){2, 2}); - NN_conv2d( + nn_conv2d( &model->decode_conv6_0, &model->interpolate_4, &model->decode_conv6_0_weight, NULL, (size_t[]){1, 1}, (size_t[]){0, 0}, (size_t[]){1, 1}, 1); - NN_batch_norm2d( + nn_batch_norm2d( &model->decode_conv6_1, &model->decode_conv6_0, &model->decode_conv6_1_weight, &model->decode_conv6_1_bias, 1e-05, &model->decode_conv6_1_running_mean, &model->decode_conv6_1_running_var); - NN_relu(&model->decode_conv6_2, &model->decode_conv6_1); + nn_relu(&model->decode_conv6_2, &model->decode_conv6_1); } diff --git a/examples/llama2/main.c b/examples/llama2/main.c index b39793a..b2e0765 100644 --- a/examples/llama2/main.c +++ b/examples/llama2/main.c @@ -96,20 +96,20 @@ typedef struct { void malloc_run_state(RunState *s, Config *p) { // we calloc instead of malloc to keep valgrind happy int kv_dim = (p->dim * p->n_kv_heads) / p->n_heads; - s->key_cache = NN_zeros(3, (size_t[]){p->n_layers, p->seq_len, kv_dim}, DTYPE_F32); - s->value_cache = NN_zeros(3, (size_t[]){p->n_layers, p->seq_len, kv_dim}, DTYPE_F32); - s->att = NN_zeros(2, (size_t[]){p->n_heads, p->seq_len}, DTYPE_F32); - s->logits = NN_zeros(1, (size_t[]){p->vocab_size}, DTYPE_F32); + s->key_cache = nn_zeros(3, (size_t[]){p->n_layers, p->seq_len, kv_dim}, DTYPE_F32); + s->value_cache = nn_zeros(3, (size_t[]){p->n_layers, p->seq_len, kv_dim}, DTYPE_F32); + s->att = nn_zeros(2, (size_t[]){p->n_heads, p->seq_len}, DTYPE_F32); + s->logits = nn_zeros(1, (size_t[]){p->vocab_size}, DTYPE_F32); - s->x = NN_zeros(1, (size_t[]){p->dim}, DTYPE_F32); - s->xb = NN_zeros(1, (size_t[]){p->dim}, DTYPE_F32); - s->xb2 = NN_zeros(1, (size_t[]){p->dim}, DTYPE_F32); - s->hb = NN_zeros(1, (size_t[]){p->hidden_dim}, DTYPE_F32); - s->hb2 = NN_zeros(1, (size_t[]){p->hidden_dim}, DTYPE_F32); + s->x = nn_zeros(1, (size_t[]){p->dim}, DTYPE_F32); + s->xb = nn_zeros(1, (size_t[]){p->dim}, DTYPE_F32); + s->xb2 = nn_zeros(1, (size_t[]){p->dim}, DTYPE_F32); + s->hb = nn_zeros(1, (size_t[]){p->hidden_dim}, DTYPE_F32); + s->hb2 = nn_zeros(1, (size_t[]){p->hidden_dim}, DTYPE_F32); - s->q = NN_tensor(1, (size_t[]){p->dim}, DTYPE_F32, NULL); - s->k = NN_tensor(1, (size_t[]){kv_dim}, DTYPE_F32, NULL); - s->v = NN_tensor(1, (size_t[]){kv_dim}, DTYPE_F32, NULL); + s->q = nn_tensor(1, (size_t[]){p->dim}, DTYPE_F32, NULL); + s->k = nn_tensor(1, (size_t[]){kv_dim}, DTYPE_F32, NULL); + s->v = nn_tensor(1, (size_t[]){kv_dim}, DTYPE_F32, NULL); } void free_run_state(RunState *s) { @@ -199,26 +199,26 @@ Tensor *forward(Transformer *transformer, int token, int pos) { float *content_row = w->token_embedding_table_ptr + token * dim; memcpy(s->x->data, content_row, dim*sizeof(float)); - s->rms_final_weight = NN_tensor(1, (size_t[]){dim}, DTYPE_F32, w->rms_ffn_weight_ptr); - s->rms_final_weight = NN_tensor(1, (size_t[]){dim}, DTYPE_F32, w->rms_att_weight_ptr); - s->wq = NN_tensor(2, (size_t[]){dim, dim}, DTYPE_F32, w->wq_ptr); - s->wk = NN_tensor(2, (size_t[]){kv_dim, dim}, DTYPE_F32, w->wk_ptr); - s->wv = NN_tensor(2, (size_t[]){kv_dim, dim}, DTYPE_F32, w->wv_ptr); - s->wo = NN_tensor(2, (size_t[]){dim, dim}, DTYPE_F32, w->wo_ptr); - s->w1 = NN_tensor(2, (size_t[]){hidden_dim, dim}, DTYPE_F32, w->w1_ptr); - s->w2 = NN_tensor(2, (size_t[]){dim, hidden_dim}, DTYPE_F32, w->w2_ptr); - s->w3 = NN_tensor(2, (size_t[]){hidden_dim, dim}, DTYPE_F32, w->w3_ptr); + s->rms_final_weight = nn_tensor(1, (size_t[]){dim}, DTYPE_F32, w->rms_ffn_weight_ptr); + s->rms_final_weight = nn_tensor(1, (size_t[]){dim}, DTYPE_F32, w->rms_att_weight_ptr); + s->wq = nn_tensor(2, (size_t[]){dim, dim}, DTYPE_F32, w->wq_ptr); + s->wk = nn_tensor(2, (size_t[]){kv_dim, dim}, DTYPE_F32, w->wk_ptr); + s->wv = nn_tensor(2, (size_t[]){kv_dim, dim}, DTYPE_F32, w->wv_ptr); + s->wo = nn_tensor(2, (size_t[]){dim, dim}, DTYPE_F32, w->wo_ptr); + s->w1 = nn_tensor(2, (size_t[]){hidden_dim, dim}, DTYPE_F32, w->w1_ptr); + s->w2 = nn_tensor(2, (size_t[]){dim, hidden_dim}, DTYPE_F32, w->w2_ptr); + s->w3 = nn_tensor(2, (size_t[]){hidden_dim, dim}, DTYPE_F32, w->w3_ptr); - s->rms_final_weight = NN_tensor(1, (size_t[]){p->dim}, DTYPE_F32, w->rms_final_weight_ptr); - s->wcls = NN_tensor(2, (size_t[]){p->vocab_size, p->dim}, DTYPE_F32, w->wcls_ptr); + s->rms_final_weight = nn_tensor(1, (size_t[]){p->dim}, DTYPE_F32, w->rms_final_weight_ptr); + s->wcls = nn_tensor(2, (size_t[]){p->vocab_size, p->dim}, DTYPE_F32, w->wcls_ptr); - Tensor *att_tensor = NN_tensor(2, (size_t[]){1, pos + 1}, DTYPE_F32, s->att->data); + Tensor *att_tensor = nn_tensor(2, (size_t[]){1, pos + 1}, DTYPE_F32, s->att->data); // forward all the layers for (size_t l = 0; l < p->n_layers; l += 1) { // attention rmsnorm s->rms_final_weight->data = w->rms_att_weight_ptr + l*dim; - NN_rms_norm(s->xb, s->x, s->rms_final_weight, 1e-5); + nn_rms_norm(s->xb, s->x, s->rms_final_weight, 1e-5); // key and value point to the kv cache int loff = l * p->seq_len * kv_dim; // kv cache layer offset for convenience @@ -230,9 +230,9 @@ Tensor *forward(Transformer *transformer, int token, int pos) { s->wk->data = w->wk_ptr + l*dim*kv_dim; s->wv->data = w->wv_ptr + l*dim*kv_dim; - NN_matmul(s->q, s->wq, s->xb); - NN_matmul(s->k, s->wk, s->xb); - NN_matmul(s->v, s->wv, s->xb); + nn_matmul(s->q, s->wq, s->xb); + nn_matmul(s->k, s->wk, s->xb); + nn_matmul(s->v, s->wv, s->xb); // RoPE relative positional encoding: complex-valued rotate q and k in each head @@ -274,7 +274,7 @@ Tensor *forward(Transformer *transformer, int token, int pos) { // softmax the scores to get attention weights, from 0..pos inclusively att_tensor->data = att; - NN_softmax(att_tensor, att_tensor, 1); + nn_softmax(att_tensor, att_tensor, 1); // weighted sum of the values, store back into xb float *xb = ((float *)s->xb->data) + h * head_size; @@ -293,41 +293,41 @@ Tensor *forward(Transformer *transformer, int token, int pos) { // final matmul to get the output of the attention s->wo->data = w->wo_ptr + l*dim*dim; - NN_matmul(s->xb2, s->wo, s->xb); + nn_matmul(s->xb2, s->wo, s->xb); // residual connection back into x - NN_add_inplace(s->x, s->xb2); + nn_add_inplace(s->x, s->xb2); // ffn rmsnorm s->rms_final_weight->data = w->rms_ffn_weight_ptr + l*dim; - NN_rms_norm(s->xb, s->x, s->rms_final_weight, 1e-5); + nn_rms_norm(s->xb, s->x, s->rms_final_weight, 1e-5); // Now for FFN in PyTorch we have: self.w2(F.silu(self.w1(x)) * self.w3(x)) // first calculate self.w1(x) and self.w3(x) s->w1->data = w->w1_ptr + l*dim*hidden_dim; s->w3->data = w->w3_ptr + l*dim*hidden_dim; - NN_matmul(s->hb, s->w1, s->xb); - NN_matmul(s->hb2, s->w3, s->xb); + nn_matmul(s->hb, s->w1, s->xb); + nn_matmul(s->hb2, s->w3, s->xb); // SwiGLU non-linearity // silu(x)=x*σ(x), where σ(x) is the logistic sigmoid - NN_silu(s->hb, s->hb); + nn_silu(s->hb, s->hb); // elementwise multiply with w3(x) - NN_mul_inplace(s->hb, s->hb2); + nn_mul_inplace(s->hb, s->hb2); // final matmul to get the output of the ffn s->w2->data = w->w2_ptr + l*dim*hidden_dim; - NN_matmul(s->xb, s->w2, s->hb); + nn_matmul(s->xb, s->w2, s->hb); // residual connection - NN_add_inplace(s->x, s->xb); + nn_add_inplace(s->x, s->xb); } // final rmsnorm - NN_rms_norm(s->x, s->x, s->rms_final_weight, 1e-5); + nn_rms_norm(s->x, s->x, s->rms_final_weight, 1e-5); // classifier into logits - NN_matmul(s->logits, s->wcls, s->x); + nn_matmul(s->logits, s->wcls, s->x); return s->logits; } @@ -683,10 +683,10 @@ int sample(Sampler *sampler, Tensor *logits) { } else { // apply the temperature to the logits - NN_mul1_inplace(logits, 1.0f / sampler->temperature); + nn_mul1_inplace(logits, 1.0f / sampler->temperature); // apply softmax to the logits to get the probabilities for next token - NN_softmax(logits, logits, 1); + nn_softmax(logits, logits, 1); // flip a (float) coin (this is our source of entropy for sampling) float coin = random_f32(&sampler->rng_state); diff --git a/examples/mlp-cartpole/main.c b/examples/mlp-cartpole/main.c index 33b9e1b..6ac4ecd 100644 --- a/examples/mlp-cartpole/main.c +++ b/examples/mlp-cartpole/main.c @@ -33,7 +33,7 @@ int main() { init(model); printf("setting input data...\n"); - NN_fill(&model->input_1, 1.0); + nn_fill(&model->input_1, 1.0); // cycles = READ_CSR("mcycle"); forward(model); @@ -44,7 +44,7 @@ int main() { // output tensor([[ 0.0258, -0.0050, 0.0902, -0.0022, -0.0924, -0.0574, 0.0328, 0.0386, -0.0277, 0.0788, 0.0603, -0.0085]]) printf("output:\n"); - NN_printf(&model->actor_6); + nn_printf(&model->actor_6); return 0; } diff --git a/examples/mlp-cartpole/model.h b/examples/mlp-cartpole/model.h index 22cdf52..378a057 100644 --- a/examples/mlp-cartpole/model.h +++ b/examples/mlp-cartpole/model.h @@ -43,44 +43,44 @@ void forward(Model *model); void init(Model *model) { float *weight_ptr = (float *)model_weight_data; - NN_init_tensor(&model->input_1, 2, (size_t[]){ 1, 48 }, DTYPE_F32, NULL); + nn_init_tensor(&model->input_1, 2, (size_t[]){ 1, 48 }, DTYPE_F32, NULL); // : actor_0 - NN_init_tensor(&model->actor_0_weight, 2, (size_t[]){ 512, 48 }, DTYPE_F32, weight_ptr); + nn_init_tensor(&model->actor_0_weight, 2, (size_t[]){ 512, 48 }, DTYPE_F32, weight_ptr); weight_ptr += 24576; - NN_init_tensor(&model->actor_0_bias, 1, (size_t[]){ 512 }, DTYPE_F32, weight_ptr); + nn_init_tensor(&model->actor_0_bias, 1, (size_t[]){ 512 }, DTYPE_F32, weight_ptr); weight_ptr += 512; - NN_init_tensor(&model->actor_0, 2, (size_t[]){ 1, 512 }, DTYPE_F32, NULL); + nn_init_tensor(&model->actor_0, 2, (size_t[]){ 1, 512 }, DTYPE_F32, NULL); // : actor_1 - NN_init_tensor(&model->actor_1, 2, (size_t[]){ 1, 512 }, DTYPE_F32, NULL); + nn_init_tensor(&model->actor_1, 2, (size_t[]){ 1, 512 }, DTYPE_F32, NULL); // : actor_2 - NN_init_tensor(&model->actor_2_weight, 2, (size_t[]){ 256, 512 }, DTYPE_F32, weight_ptr); + nn_init_tensor(&model->actor_2_weight, 2, (size_t[]){ 256, 512 }, DTYPE_F32, weight_ptr); weight_ptr += 131072; - NN_init_tensor(&model->actor_2_bias, 1, (size_t[]){ 256 }, DTYPE_F32, weight_ptr); + nn_init_tensor(&model->actor_2_bias, 1, (size_t[]){ 256 }, DTYPE_F32, weight_ptr); weight_ptr += 256; - NN_init_tensor(&model->actor_2, 2, (size_t[]){ 1, 256 }, DTYPE_F32, NULL); + nn_init_tensor(&model->actor_2, 2, (size_t[]){ 1, 256 }, DTYPE_F32, NULL); // : actor_3 - NN_init_tensor(&model->actor_3, 2, (size_t[]){ 1, 256 }, DTYPE_F32, NULL); + nn_init_tensor(&model->actor_3, 2, (size_t[]){ 1, 256 }, DTYPE_F32, NULL); // : actor_4 - NN_init_tensor(&model->actor_4_weight, 2, (size_t[]){ 128, 256 }, DTYPE_F32, weight_ptr); + nn_init_tensor(&model->actor_4_weight, 2, (size_t[]){ 128, 256 }, DTYPE_F32, weight_ptr); weight_ptr += 32768; - NN_init_tensor(&model->actor_4_bias, 1, (size_t[]){ 128 }, DTYPE_F32, weight_ptr); + nn_init_tensor(&model->actor_4_bias, 1, (size_t[]){ 128 }, DTYPE_F32, weight_ptr); weight_ptr += 128; - NN_init_tensor(&model->actor_4, 2, (size_t[]){ 1, 128 }, DTYPE_F32, NULL); + nn_init_tensor(&model->actor_4, 2, (size_t[]){ 1, 128 }, DTYPE_F32, NULL); // : actor_5 - NN_init_tensor(&model->actor_5, 2, (size_t[]){ 1, 128 }, DTYPE_F32, NULL); + nn_init_tensor(&model->actor_5, 2, (size_t[]){ 1, 128 }, DTYPE_F32, NULL); // : actor_6 - NN_init_tensor(&model->actor_6_weight, 2, (size_t[]){ 12, 128 }, DTYPE_F32, weight_ptr); + nn_init_tensor(&model->actor_6_weight, 2, (size_t[]){ 12, 128 }, DTYPE_F32, weight_ptr); weight_ptr += 1536; - NN_init_tensor(&model->actor_6_bias, 1, (size_t[]){ 12 }, DTYPE_F32, weight_ptr); + nn_init_tensor(&model->actor_6_bias, 1, (size_t[]){ 12 }, DTYPE_F32, weight_ptr); weight_ptr += 12; - NN_init_tensor(&model->actor_6, 2, (size_t[]){ 1, 12 }, DTYPE_F32, NULL); + nn_init_tensor(&model->actor_6, 2, (size_t[]){ 1, 12 }, DTYPE_F32, NULL); } @@ -89,13 +89,13 @@ void init(Model *model) { * Forward pass of the model */ void forward(Model *model) { - NN_linear(&model->actor_0, &model->input_1, &model->actor_0_weight, &model->actor_0_bias); - NN_elu(&model->actor_1, &model->actor_0, 1.0); - NN_linear(&model->actor_2, &model->actor_1, &model->actor_2_weight, &model->actor_2_bias); - NN_elu(&model->actor_3, &model->actor_2, 1.0); - NN_linear(&model->actor_4, &model->actor_3, &model->actor_4_weight, &model->actor_4_bias); - NN_elu(&model->actor_5, &model->actor_4, 1.0); - NN_linear(&model->actor_6, &model->actor_5, &model->actor_6_weight, &model->actor_6_bias); + nn_linear(&model->actor_0, &model->input_1, &model->actor_0_weight, &model->actor_0_bias); + nn_elu(&model->actor_1, &model->actor_0, 1.0); + nn_linear(&model->actor_2, &model->actor_1, &model->actor_2_weight, &model->actor_2_bias); + nn_elu(&model->actor_3, &model->actor_2, 1.0); + nn_linear(&model->actor_4, &model->actor_3, &model->actor_4_weight, &model->actor_4_bias); + nn_elu(&model->actor_5, &model->actor_4, 1.0); + nn_linear(&model->actor_6, &model->actor_5, &model->actor_6_weight, &model->actor_6_bias); } diff --git a/examples/mlp-ppo/model.h b/examples/mlp-ppo/model.h index a5ee9d7..4631c2d 100644 --- a/examples/mlp-ppo/model.h +++ b/examples/mlp-ppo/model.h @@ -43,44 +43,44 @@ void forward(Model *model); void init(Model *model) { float *weight_ptr = (float *)model_weight_data; - NN_init_tensor(&model->input_1, 2, (size_t[]){ 1, 123 }, DTYPE_F32, NULL); + nn_init_tensor(&model->input_1, 2, (size_t[]){ 1, 123 }, DTYPE_F32, NULL); // : _0 - NN_init_tensor(&model->_0_weight, 2, (size_t[]){ 256, 123 }, DTYPE_F32, weight_ptr); + nn_init_tensor(&model->_0_weight, 2, (size_t[]){ 256, 123 }, DTYPE_F32, weight_ptr); weight_ptr += 31488; - NN_init_tensor(&model->_0_bias, 1, (size_t[]){ 256 }, DTYPE_F32, weight_ptr); + nn_init_tensor(&model->_0_bias, 1, (size_t[]){ 256 }, DTYPE_F32, weight_ptr); weight_ptr += 256; - NN_init_tensor(&model->_0, 2, (size_t[]){ 1, 256 }, DTYPE_F32, NULL); + nn_init_tensor(&model->_0, 2, (size_t[]){ 1, 256 }, DTYPE_F32, NULL); // : _1 - NN_init_tensor(&model->_1, 2, (size_t[]){ 1, 256 }, DTYPE_F32, NULL); + nn_init_tensor(&model->_1, 2, (size_t[]){ 1, 256 }, DTYPE_F32, NULL); // : _2 - NN_init_tensor(&model->_2_weight, 2, (size_t[]){ 128, 256 }, DTYPE_F32, weight_ptr); + nn_init_tensor(&model->_2_weight, 2, (size_t[]){ 128, 256 }, DTYPE_F32, weight_ptr); weight_ptr += 32768; - NN_init_tensor(&model->_2_bias, 1, (size_t[]){ 128 }, DTYPE_F32, weight_ptr); + nn_init_tensor(&model->_2_bias, 1, (size_t[]){ 128 }, DTYPE_F32, weight_ptr); weight_ptr += 128; - NN_init_tensor(&model->_2, 2, (size_t[]){ 1, 128 }, DTYPE_F32, NULL); + nn_init_tensor(&model->_2, 2, (size_t[]){ 1, 128 }, DTYPE_F32, NULL); // : _3 - NN_init_tensor(&model->_3, 2, (size_t[]){ 1, 128 }, DTYPE_F32, NULL); + nn_init_tensor(&model->_3, 2, (size_t[]){ 1, 128 }, DTYPE_F32, NULL); // : _4 - NN_init_tensor(&model->_4_weight, 2, (size_t[]){ 128, 128 }, DTYPE_F32, weight_ptr); + nn_init_tensor(&model->_4_weight, 2, (size_t[]){ 128, 128 }, DTYPE_F32, weight_ptr); weight_ptr += 16384; - NN_init_tensor(&model->_4_bias, 1, (size_t[]){ 128 }, DTYPE_F32, weight_ptr); + nn_init_tensor(&model->_4_bias, 1, (size_t[]){ 128 }, DTYPE_F32, weight_ptr); weight_ptr += 128; - NN_init_tensor(&model->_4, 2, (size_t[]){ 1, 128 }, DTYPE_F32, NULL); + nn_init_tensor(&model->_4, 2, (size_t[]){ 1, 128 }, DTYPE_F32, NULL); // : _5 - NN_init_tensor(&model->_5, 2, (size_t[]){ 1, 128 }, DTYPE_F32, NULL); + nn_init_tensor(&model->_5, 2, (size_t[]){ 1, 128 }, DTYPE_F32, NULL); // : _6 - NN_init_tensor(&model->_6_weight, 2, (size_t[]){ 37, 128 }, DTYPE_F32, weight_ptr); + nn_init_tensor(&model->_6_weight, 2, (size_t[]){ 37, 128 }, DTYPE_F32, weight_ptr); weight_ptr += 4736; - NN_init_tensor(&model->_6_bias, 1, (size_t[]){ 37 }, DTYPE_F32, weight_ptr); + nn_init_tensor(&model->_6_bias, 1, (size_t[]){ 37 }, DTYPE_F32, weight_ptr); weight_ptr += 37; - NN_init_tensor(&model->_6, 2, (size_t[]){ 1, 37 }, DTYPE_F32, NULL); + nn_init_tensor(&model->_6, 2, (size_t[]){ 1, 37 }, DTYPE_F32, NULL); } @@ -89,13 +89,13 @@ void init(Model *model) { * Forward pass of the model */ void forward(Model *model) { - NN_linear(&model->_0, &model->input_1, &model->_0_weight, &model->_0_bias); - NN_elu(&model->_1, &model->_0, 1.0); - NN_linear(&model->_2, &model->_1, &model->_2_weight, &model->_2_bias); - NN_elu(&model->_3, &model->_2, 1.0); - NN_linear(&model->_4, &model->_3, &model->_4_weight, &model->_4_bias); - NN_elu(&model->_5, &model->_4, 1.0); - NN_linear(&model->_6, &model->_5, &model->_6_weight, &model->_6_bias); + nn_linear(&model->_0, &model->input_1, &model->_0_weight, &model->_0_bias); + nn_elu(&model->_1, &model->_0, 1.0); + nn_linear(&model->_2, &model->_1, &model->_2_weight, &model->_2_bias); + nn_elu(&model->_3, &model->_2, 1.0); + nn_linear(&model->_4, &model->_3, &model->_4_weight, &model->_4_bias); + nn_elu(&model->_5, &model->_4, 1.0); + nn_linear(&model->_6, &model->_5, &model->_6_weight, &model->_6_bias); } diff --git a/examples/mlp/main.c b/examples/mlp/main.c index 953b699..567f426 100644 --- a/examples/mlp/main.c +++ b/examples/mlp/main.c @@ -33,18 +33,20 @@ int main() { model_init(model); printf("setting input data...\n"); - // NN_fill(&model->input_1, 1.0); + for (int i = 0; i < 48; i += 1) { + model->input_1.data[i] = 1.0; + } - // cycles = READ_CSR("mcycle"); + cycles = READ_CSR("mcycle"); model_forward(model); - // cycles = READ_CSR("mcycle") - cycles; + cycles = READ_CSR("mcycle") - cycles; printf("cycles: %lu\n", cycles); // output tensor([[ 0.0258, -0.0050, 0.0902, -0.0022, -0.0924, -0.0574, 0.0328, 0.0386, -0.0277, 0.0788, 0.0603, -0.0085]]) printf("output:\n"); - // NN_printf(&model->output); + nn_print_tensor2d_f32(&model->output); return 0; } diff --git a/examples/mlp/model.h b/examples/mlp/model.h index 080cbea..2c52e1a 100644 --- a/examples/mlp/model.h +++ b/examples/mlp/model.h @@ -70,12 +70,12 @@ void model_init(Model* model) { } void model_forward(Model* model) { - NN_addmm_f32(&model->seq_0, &model->input_1, &model->seq_0_weight, &model->seq_0_bias); - NN_elu2d_f32(&model->seq_1, &model->seq_0, 1.0); - NN_addmm_f32(&model->seq_2, &model->seq_1, &model->seq_2_weight, &model->seq_2_bias); - NN_relu2d_f32(&model->relu, &model->seq_2); - NN_addmm_f32(&model->linear, &model->relu, &model->lin2_weight, &model->lin2_bias); - NN_relu2d_f32(&model->relu_1, &model->relu); + nn_addmm_f32(&model->seq_0, &model->input_1, &model->seq_0_weight, &model->seq_0_bias); + nn_elu2d_f32(&model->seq_1, &model->seq_0, 1.0); + nn_addmm_f32(&model->seq_2, &model->seq_1, &model->seq_2_weight, &model->seq_2_bias); + nn_relu2d_f32(&model->relu, &model->seq_2); + nn_addmm_f32(&model->linear, &model->relu, &model->lin2_weight, &model->lin2_bias); + nn_relu2d_f32(&model->relu_1, &model->relu); memcpy(model->output.data, model->linear.data, 48); } diff --git a/examples/mnist/main.c b/examples/mnist/main.c index 0be9248..dbd2d64 100644 --- a/examples/mnist/main.c +++ b/examples/mnist/main.c @@ -18,7 +18,7 @@ int main() { forward(&model); printf("Output:\n"); - NN_printf(&model.fc3); + nn_printf(&model.fc3); return 0; } \ No newline at end of file diff --git a/examples/mnist/model.h b/examples/mnist/model.h index 76133cb..77a6cf4 100644 --- a/examples/mnist/model.h +++ b/examples/mnist/model.h @@ -40,31 +40,31 @@ void forward(Model *model); void init(Model *model) { float *weight_ptr = (float *)model_weight_data; - NN_init_tensor(&model->x, 4, (size_t[]){ 4, 28, 28, 1 }, DTYPE_F32, NULL); - NN_init_tensor(&model->flatten, 2, (size_t[]){ 4, 784 }, DTYPE_F32, NULL); + nn_init_tensor(&model->x, 4, (size_t[]){ 4, 28, 28, 1 }, DTYPE_F32, NULL); + nn_init_tensor(&model->flatten, 2, (size_t[]){ 4, 784 }, DTYPE_F32, NULL); // : fc1 - NN_init_tensor(&model->fc1_weight, 2, (size_t[]){ 16, 784 }, DTYPE_F32, weight_ptr); + nn_init_tensor(&model->fc1_weight, 2, (size_t[]){ 16, 784 }, DTYPE_F32, weight_ptr); weight_ptr += 12544; - NN_init_tensor(&model->fc1_bias, 1, (size_t[]){ 16 }, DTYPE_F32, weight_ptr); + nn_init_tensor(&model->fc1_bias, 1, (size_t[]){ 16 }, DTYPE_F32, weight_ptr); weight_ptr += 16; - NN_init_tensor(&model->fc1, 2, (size_t[]){ 4, 16 }, DTYPE_F32, NULL); - NN_init_tensor(&model->relu, 2, (size_t[]){ 4, 16 }, DTYPE_F32, NULL); + nn_init_tensor(&model->fc1, 2, (size_t[]){ 4, 16 }, DTYPE_F32, NULL); + nn_init_tensor(&model->relu, 2, (size_t[]){ 4, 16 }, DTYPE_F32, NULL); // : fc2 - NN_init_tensor(&model->fc2_weight, 2, (size_t[]){ 16, 16 }, DTYPE_F32, weight_ptr); + nn_init_tensor(&model->fc2_weight, 2, (size_t[]){ 16, 16 }, DTYPE_F32, weight_ptr); weight_ptr += 256; - NN_init_tensor(&model->fc2_bias, 1, (size_t[]){ 16 }, DTYPE_F32, weight_ptr); + nn_init_tensor(&model->fc2_bias, 1, (size_t[]){ 16 }, DTYPE_F32, weight_ptr); weight_ptr += 16; - NN_init_tensor(&model->fc2, 2, (size_t[]){ 4, 16 }, DTYPE_F32, NULL); - NN_init_tensor(&model->relu_1, 2, (size_t[]){ 4, 16 }, DTYPE_F32, NULL); + nn_init_tensor(&model->fc2, 2, (size_t[]){ 4, 16 }, DTYPE_F32, NULL); + nn_init_tensor(&model->relu_1, 2, (size_t[]){ 4, 16 }, DTYPE_F32, NULL); // : fc3 - NN_init_tensor(&model->fc3_weight, 2, (size_t[]){ 10, 16 }, DTYPE_F32, weight_ptr); + nn_init_tensor(&model->fc3_weight, 2, (size_t[]){ 10, 16 }, DTYPE_F32, weight_ptr); weight_ptr += 160; - NN_init_tensor(&model->fc3_bias, 1, (size_t[]){ 10 }, DTYPE_F32, weight_ptr); + nn_init_tensor(&model->fc3_bias, 1, (size_t[]){ 10 }, DTYPE_F32, weight_ptr); weight_ptr += 10; - NN_init_tensor(&model->fc3, 2, (size_t[]){ 4, 10 }, DTYPE_F32, NULL); + nn_init_tensor(&model->fc3, 2, (size_t[]){ 4, 10 }, DTYPE_F32, NULL); } @@ -73,11 +73,11 @@ void init(Model *model) { * Forward pass of the model */ void forward(Model *model) { - NN_linear(&model->fc1, &model->flatten, &model->fc1_weight, &model->fc1_bias); - NN_relu(&model->relu, &model->fc1); - NN_linear(&model->fc2, &model->relu, &model->fc2_weight, &model->fc2_bias); - NN_relu(&model->relu_1, &model->fc2); - NN_linear(&model->fc3, &model->relu_1, &model->fc3_weight, &model->fc3_bias); + nn_linear(&model->fc1, &model->flatten, &model->fc1_weight, &model->fc1_bias); + nn_relu(&model->relu, &model->fc1); + nn_linear(&model->fc2, &model->relu, &model->fc2_weight, &model->fc2_bias); + nn_relu(&model->relu_1, &model->fc2); + nn_linear(&model->fc3, &model->relu_1, &model->fc3_weight, &model->fc3_bias); } diff --git a/examples/simple/main.c b/examples/simple/main.c index 0693fe4..1c72f82 100644 --- a/examples/simple/main.c +++ b/examples/simple/main.c @@ -30,27 +30,27 @@ Tensor D; * Initialize the required tensors for the model */ void init(Tensor *A, Tensor *B, Tensor *C, Tensor *D) { - NN_init_tensor(A, 2, (size_t[]){3, 3}, DTYPE_F32, (float *)malloc(9 * sizeof(float))); - NN_init_tensor(B, 2, (size_t[]){3, 3}, DTYPE_F32, (float *)(weights_data + 3 * sizeof(float))); - NN_init_tensor(C, 2, (size_t[]){3, 3}, DTYPE_F32, (float *)malloc(9 * sizeof(float))); - NN_init_tensor(D, 1, (size_t[]){3}, DTYPE_F32, (float *)(weights_data + 0 * sizeof(float))); + nn_init_tensor(A, 2, (size_t[]){3, 3}, DTYPE_F32, (float *)malloc(9 * sizeof(float))); + nn_init_tensor(B, 2, (size_t[]){3, 3}, DTYPE_F32, (float *)(weights_data + 3 * sizeof(float))); + nn_init_tensor(C, 2, (size_t[]){3, 3}, DTYPE_F32, (float *)malloc(9 * sizeof(float))); + nn_init_tensor(D, 1, (size_t[]){3}, DTYPE_F32, (float *)(weights_data + 0 * sizeof(float))); } /** * Deinitialize the tensors used for the model */ void deinit(Tensor *A, Tensor *B, Tensor *C, Tensor *D) { - NN_freeTensor(A); - NN_freeTensor(B); - NN_freeTensor(C); - NN_freeTensor(D); + nn_freeTensor(A); + nn_freeTensor(B); + nn_freeTensor(C); + nn_freeTensor(D); } /** * Forward pass of the model */ void forward(Tensor *C, Tensor *A, Tensor *B, Tensor *D) { - NN_Linear_F32(C, A, B, D); + nn_Linear_F32(C, A, B, D); } @@ -68,16 +68,16 @@ int main() { forward(&C, &A, &B, &D); printf("A:\n"); - NN_printf(&A); + nn_printf(&A); printf("B:\n"); - NN_printf(&B); + nn_printf(&B); printf("C:\n"); - NN_printf(&C); + nn_printf(&C); printf("D:\n"); - NN_printf(&D); + nn_printf(&D); deinit(&A, &B, &C, &D); diff --git a/examples/stereo-block-matching/main.c b/examples/stereo-block-matching/main.c index a9bb03f..3d9c15a 100644 --- a/examples/stereo-block-matching/main.c +++ b/examples/stereo-block-matching/main.c @@ -60,15 +60,15 @@ Tensor* compute_dispartiy(Tensor *left, Tensor *right, int min_disparity, int ma int sad_iop = 0; - Tensor *disparity_img = NN_zeros(4, (const size_t[]){1, s_h, s_w, 1}, DTYPE_U8); + Tensor *disparity_img = nn_zeros(4, (const size_t[]){1, s_h, s_w, 1}, DTYPE_U8); - Tensor *left_block = NN_tensor(2, (const size_t[]){1, 2*half_block_size}, DTYPE_U8, (uint8_t *)left->data); - Tensor *right_block = NN_tensor(2, (const size_t[]){1, 2*half_block_size}, DTYPE_U8, (uint8_t *)right->data); - Tensor *left_block_signed = NN_tensor(2, (const size_t[]){1, 2*half_block_size}, DTYPE_U32, NULL); - Tensor *right_block_signed = NN_tensor(2, (const size_t[]){1, 2*half_block_size}, DTYPE_U32, NULL); - Tensor *diff = NN_tensor(2, (const size_t[]){1, 2*half_block_size}, DTYPE_U8, NULL); - Tensor *diff_wide = NN_tensor(2, (const size_t[]){1, 2*half_block_size}, DTYPE_I32, NULL); - Tensor *out = NN_tensor(1, (const size_t[]){1}, DTYPE_I32, NULL); + Tensor *left_block = nn_tensor(2, (const size_t[]){1, 2*half_block_size}, DTYPE_U8, (uint8_t *)left->data); + Tensor *right_block = nn_tensor(2, (const size_t[]){1, 2*half_block_size}, DTYPE_U8, (uint8_t *)right->data); + Tensor *left_block_signed = nn_tensor(2, (const size_t[]){1, 2*half_block_size}, DTYPE_U32, NULL); + Tensor *right_block_signed = nn_tensor(2, (const size_t[]){1, 2*half_block_size}, DTYPE_U32, NULL); + Tensor *diff = nn_tensor(2, (const size_t[]){1, 2*half_block_size}, DTYPE_U8, NULL); + Tensor *diff_wide = nn_tensor(2, (const size_t[]){1, 2*half_block_size}, DTYPE_I32, NULL); + Tensor *out = nn_tensor(1, (const size_t[]){1}, DTYPE_I32, NULL); // compute disparity // outer loop iterating over blocks @@ -100,15 +100,15 @@ Tensor* compute_dispartiy(Tensor *left, Tensor *right, int min_disparity, int ma left_block->data = ((uint8_t *)left->data) + row*width + col; right_block->data = ((uint8_t *)right->data) + row*width + col + offset; - NN_sub(diff, left_block, right_block); + nn_sub(diff, left_block, right_block); diff->dtype = DTYPE_I8; - NN_copy(diff_wide, diff); + nn_copy(diff_wide, diff); diff->dtype = DTYPE_U8; - NN_abs_inplace(diff_wide); + nn_abs_inplace(diff_wide); - NN_sum(out, diff_wide); + nn_sum(out, diff_wide); SAD += ((int32_t *)out->data)[0]; } // reduction step @@ -121,16 +121,16 @@ Tensor* compute_dispartiy(Tensor *left, Tensor *right, int min_disparity, int ma } } - NN_free_tensor_data(left_block_signed); - NN_free_tensor_data(right_block_signed); - NN_free_tensor_data(diff); - NN_free_tensor_data(out); - NN_delete_tensor(left_block_signed); - NN_delete_tensor(right_block_signed); - NN_delete_tensor(diff); - NN_delete_tensor(out); - NN_delete_tensor(left_block); - NN_delete_tensor(right_block); + nn_free_tensor_data(left_block_signed); + nn_free_tensor_data(right_block_signed); + nn_free_tensor_data(diff); + nn_free_tensor_data(out); + nn_delete_tensor(left_block_signed); + nn_delete_tensor(right_block_signed); + nn_delete_tensor(diff); + nn_delete_tensor(out); + nn_delete_tensor(left_block); + nn_delete_tensor(right_block); printf("SAD IOPs: %d\n", sad_iop); @@ -141,8 +141,8 @@ int main() { file_size = (size_t)left_end - (size_t)left_start; - Tensor *left_image = NN_tensor(4, (const size_t[]){1, IMG_HEIGHT, IMG_WIDTH, 1}, DTYPE_U8, left_data); - Tensor *right_image = NN_tensor(4, (const size_t[]){1, IMG_HEIGHT, IMG_WIDTH, 1}, DTYPE_U8, right_data); + Tensor *left_image = nn_tensor(4, (const size_t[]){1, IMG_HEIGHT, IMG_WIDTH, 1}, DTYPE_U8, left_data); + Tensor *right_image = nn_tensor(4, (const size_t[]){1, IMG_HEIGHT, IMG_WIDTH, 1}, DTYPE_U8, right_data); size_t cycles = READ_CSR("cycle"); Tensor *disparity_img = compute_dispartiy(left_image, right_image, 0, 32, 4); @@ -153,11 +153,11 @@ int main() { // Save the disparity image printf("Result:\n"); - NN_print_shape(disparity_img); + nn_print_shape(disparity_img); printf("\n"); - Tensor *disparity_img_small = NN_zeros(4, (const size_t[]){1, disparity_img->shape[1] / 4, disparity_img->shape[2] / 2, 1}, DTYPE_U8); - NN_interpolate(disparity_img_small, disparity_img, (float []){0.25, 0.5}); + Tensor *disparity_img_small = nn_zeros(4, (const size_t[]){1, disparity_img->shape[1] / 4, disparity_img->shape[2] / 2, 1}, DTYPE_U8); + nn_interpolate(disparity_img_small, disparity_img, (float []){0.25, 0.5}); show_ASCII_image(disparity_img_small, 0, 32); diff --git a/nn/nn.h b/nn/nn.h index 4937b1c..0c6e7a3 100644 --- a/nn/nn.h +++ b/nn/nn.h @@ -54,7 +54,7 @@ typedef struct { } Tensor2D_F32; -static inline void NN_assert(int condition, char *message) { +static inline void nn_assert(int condition, char *message) { if (!condition) { printf("Assertion failed: "); printf("%s\n", message); @@ -69,119 +69,119 @@ static inline uint8_t float_equal(float golden, float actual, float rel_err) { -void NN_print_u8(uint8_t v); +void nn_print_u8(uint8_t v); -void NN_print_i8(int8_t v); +void nn_print_i8(int8_t v); -void NN_print_u16(uint16_t v); +void nn_print_u16(uint16_t v); -void NN_print_i16(int16_t v); +void nn_print_i16(int16_t v); -void NN_print_u32(uint32_t v); +void nn_print_u32(uint32_t v); -void NN_print_i32(int32_t v); +void nn_print_i32(int32_t v); -void NN_print_f16(float16_t v, int16_t num_digits); +void nn_print_f16(float16_t v, int16_t num_digits); -void NN_print_f32(float v, int16_t num_digits); +void nn_print_f32(float v, int16_t num_digits); -void NN_print_shape(size_t ndim, const size_t *shape); +void nn_print_shape(size_t ndim, const size_t *shape); -void NN_print_tensor1d_f16(const Tensor1D_F16 *tensor); +void nn_print_tensor1d_f16(const Tensor1D_F16 *tensor); -void NN_print_tensor1d_f32(const Tensor1D_F32 *tensor); +void nn_print_tensor1d_f32(const Tensor1D_F32 *tensor); -void NN_print_tensor2d_f16(const Tensor2D_F16 *tensor); +void nn_print_tensor2d_f16(const Tensor2D_F16 *tensor); -void NN_print_tensor2d_f32(const Tensor2D_F32 *tensor); +void nn_print_tensor2d_f32(const Tensor2D_F32 *tensor); -uint8_t NN_equals0d_f16(const Tensor0D_F16 *a, const Tensor0D_F16 *b, float rel_err); +uint8_t nn_equals0d_f16(const Tensor0D_F16 *a, const Tensor0D_F16 *b, float rel_err); -uint8_t NN_equals0d_f32(const Tensor0D_F32 *a, const Tensor0D_F32 *b, float rel_err); +uint8_t nn_equals0d_f32(const Tensor0D_F32 *a, const Tensor0D_F32 *b, float rel_err); -uint8_t NN_equals1d_f16(const Tensor1D_F16 *a, const Tensor1D_F16 *b, float rel_err); +uint8_t nn_equals1d_f16(const Tensor1D_F16 *a, const Tensor1D_F16 *b, float rel_err); -uint8_t NN_equals1d_f32(const Tensor1D_F32 *a, const Tensor1D_F32 *b, float rel_err); +uint8_t nn_equals1d_f32(const Tensor1D_F32 *a, const Tensor1D_F32 *b, float rel_err); -uint8_t NN_equals2d_f16(const Tensor2D_F16 *a, const Tensor2D_F16 *b, float rel_err); +uint8_t nn_equals2d_f16(const Tensor2D_F16 *a, const Tensor2D_F16 *b, float rel_err); -uint8_t NN_equals2d_f32(const Tensor2D_F32 *a, const Tensor2D_F32 *b, float rel_err); +uint8_t nn_equals2d_f32(const Tensor2D_F32 *a, const Tensor2D_F32 *b, float rel_err); -void NN_add1d_f16(Tensor1D_F16 *y, const Tensor1D_F16 *x1, const Tensor1D_F16 *x2); +void nn_add1d_f16(Tensor1D_F16 *y, const Tensor1D_F16 *x1, const Tensor1D_F16 *x2); -void NN_add1d_f32(Tensor1D_F32 *y, const Tensor1D_F32 *x1, const Tensor1D_F32 *x2); +void nn_add1d_f32(Tensor1D_F32 *y, const Tensor1D_F32 *x1, const Tensor1D_F32 *x2); -void NN_add2d_f16(Tensor2D_F16 *y, const Tensor2D_F16 *x1, const Tensor2D_F16 *x2); +void nn_add2d_f16(Tensor2D_F16 *y, const Tensor2D_F16 *x1, const Tensor2D_F16 *x2); -void NN_add2d_f32(Tensor2D_F32 *y, const Tensor2D_F32 *x1, const Tensor2D_F32 *x2); +void nn_add2d_f32(Tensor2D_F32 *y, const Tensor2D_F32 *x1, const Tensor2D_F32 *x2); -void NN_addscalar1d_f16(Tensor1D_F16 *y, const Tensor1D_F16 *x, float16_t scalar); +void nn_addscalar1d_f16(Tensor1D_F16 *y, const Tensor1D_F16 *x, float16_t scalar); -void NN_addscalar1d_f32(Tensor1D_F32 *y, const Tensor1D_F32 *x, float scalar); +void nn_addscalar1d_f32(Tensor1D_F32 *y, const Tensor1D_F32 *x, float scalar); -void NN_addscalar2d_f16(Tensor2D_F16 *y, const Tensor2D_F16 *x, float16_t scalar); +void nn_addscalar2d_f16(Tensor2D_F16 *y, const Tensor2D_F16 *x, float16_t scalar); -void NN_addscalar2d_f32(Tensor2D_F32 *y, const Tensor2D_F32 *x, float scalar); +void nn_addscalar2d_f32(Tensor2D_F32 *y, const Tensor2D_F32 *x, float scalar); -void NN_addmm_f16(Tensor2D_F16 *y, const Tensor2D_F16 *x, const Tensor2D_F16 *weight, const Tensor1D_F16 *bias); +void nn_addmm_f16(Tensor2D_F16 *y, const Tensor2D_F16 *x, const Tensor2D_F16 *weight, const Tensor1D_F16 *bias); -void NN_addmm_f32(Tensor2D_F32 *y, const Tensor2D_F32 *x, const Tensor2D_F32 *weight, const Tensor1D_F32 *bias); +void nn_addmm_f32(Tensor2D_F32 *y, const Tensor2D_F32 *x, const Tensor2D_F32 *weight, const Tensor1D_F32 *bias); -void NN_elu2d_f16(Tensor2D_F16 *y, const Tensor2D_F16 *x, float alpha); +void nn_elu2d_f16(Tensor2D_F16 *y, const Tensor2D_F16 *x, float alpha); -void NN_elu2d_f32(Tensor2D_F32 *y, const Tensor2D_F32 *x, float alpha); +void nn_elu2d_f32(Tensor2D_F32 *y, const Tensor2D_F32 *x, float alpha); -void NN_relu2d_f16(Tensor2D_F16 *y, const Tensor2D_F16 *x); +void nn_relu2d_f16(Tensor2D_F16 *y, const Tensor2D_F16 *x); -void NN_relu2d_f32(Tensor2D_F32 *y, const Tensor2D_F32 *x); +void nn_relu2d_f32(Tensor2D_F32 *y, const Tensor2D_F32 *x); -void NN_max1d_f16(Tensor0D_F16 *y, const Tensor1D_F16 *x); +void nn_max1d_f16(Tensor0D_F16 *y, const Tensor1D_F16 *x); -void NN_max1d_f32(Tensor0D_F32 *y, const Tensor1D_F32 *x); +void nn_max1d_f32(Tensor0D_F32 *y, const Tensor1D_F32 *x); -void NN_max2d_f16(Tensor0D_F16 *y, const Tensor2D_F16 *x); +void nn_max2d_f16(Tensor0D_F16 *y, const Tensor2D_F16 *x); -void NN_max2d_f32(Tensor0D_F32 *y, const Tensor2D_F32 *x); +void nn_max2d_f32(Tensor0D_F32 *y, const Tensor2D_F32 *x); -void NN_min1d_f16(Tensor0D_F16 *y, const Tensor1D_F16 *x); +void nn_min1d_f16(Tensor0D_F16 *y, const Tensor1D_F16 *x); -void NN_min1d_f32(Tensor0D_F32 *y, const Tensor1D_F32 *x); +void nn_min1d_f32(Tensor0D_F32 *y, const Tensor1D_F32 *x); -void NN_min2d_f16(Tensor0D_F16 *y, const Tensor2D_F16 *x); +void nn_min2d_f16(Tensor0D_F16 *y, const Tensor2D_F16 *x); -void NN_min2d_f32(Tensor0D_F32 *y, const Tensor2D_F32 *x); +void nn_min2d_f32(Tensor0D_F32 *y, const Tensor2D_F32 *x); -void NN_mm_f16(Tensor2D_F16 *y, const Tensor2D_F16 *x1, const Tensor2D_F16 *x2); +void nn_mm_f16(Tensor2D_F16 *y, const Tensor2D_F16 *x1, const Tensor2D_F16 *x2); -void NN_mm_f32(Tensor2D_F32 *y, const Tensor2D_F32 *x1, const Tensor2D_F32 *x2); +void nn_mm_f32(Tensor2D_F32 *y, const Tensor2D_F32 *x1, const Tensor2D_F32 *x2); -void NN_mul1d_f16(Tensor1D_F16 *y, const Tensor1D_F16 *x1, const Tensor1D_F16 *x2); +void nn_mul1d_f16(Tensor1D_F16 *y, const Tensor1D_F16 *x1, const Tensor1D_F16 *x2); -void NN_mul1d_f32(Tensor1D_F32 *y, const Tensor1D_F32 *x1, const Tensor1D_F32 *x2); +void nn_mul1d_f32(Tensor1D_F32 *y, const Tensor1D_F32 *x1, const Tensor1D_F32 *x2); -void NN_mul2d_f16(Tensor2D_F16 *y, const Tensor2D_F16 *x1, const Tensor2D_F16 *x2); +void nn_mul2d_f16(Tensor2D_F16 *y, const Tensor2D_F16 *x1, const Tensor2D_F16 *x2); -void NN_mul2d_f32(Tensor2D_F32 *y, const Tensor2D_F32 *x1, const Tensor2D_F32 *x2); +void nn_mul2d_f32(Tensor2D_F32 *y, const Tensor2D_F32 *x1, const Tensor2D_F32 *x2); -void NN_mulscalar1d_f16(Tensor1D_F16 *y, const Tensor1D_F16 *x, float16_t scalar); +void nn_mulscalar1d_f16(Tensor1D_F16 *y, const Tensor1D_F16 *x, float16_t scalar); -void NN_mulscalar1d_f32(Tensor1D_F32 *y, const Tensor1D_F32 *x, float scalar); +void nn_mulscalar1d_f32(Tensor1D_F32 *y, const Tensor1D_F32 *x, float scalar); -void NN_mulscalar2d_f16(Tensor2D_F16 *y, const Tensor2D_F16 *x, float16_t scalar); +void nn_mulscalar2d_f16(Tensor2D_F16 *y, const Tensor2D_F16 *x, float16_t scalar); -void NN_mulscalar2d_f32(Tensor2D_F32 *y, const Tensor2D_F32 *x, float scalar); +void nn_mulscalar2d_f32(Tensor2D_F32 *y, const Tensor2D_F32 *x, float scalar); diff --git a/nn/nn_math.h b/nn/nn_math.h index 75451b9..e82c9fe 100644 --- a/nn/nn_math.h +++ b/nn/nn_math.h @@ -14,7 +14,7 @@ -// inline static void NN_mad_F32(const int n, float *y, const float *x, const float v) { +// inline static void nn_mad_F32(const int n, float *y, const float *x, const float v) { // #if defined(GGML_SIMD) // const int np = (n & ~(GGML_F32_STEP - 1)); @@ -45,7 +45,7 @@ // #endif // } -// inline static void NN_mad_f16(const int n, float16_t *y, const float16_t *x, const float v) { +// inline static void nn_mad_f16(const int n, float16_t *y, const float16_t *x, const float v) { // #if defined(GGML_SIMD) // const int np = (n & ~(GGML_F16_STEP - 1)); @@ -77,7 +77,7 @@ // } // // xs and vs are byte strides of x and v -// inline static void NN_mad_F32_unroll(const int n, const int xs, const int vs, float *restrict y, const float *restrict xv, const float *restrict vv) { +// inline static void nn_mad_F32_unroll(const int n, const int xs, const int vs, float *restrict y, const float *restrict xv, const float *restrict vv) { // const float *restrict x[GGML_VEC_MAD_UNROLL]; // const float *restrict v[GGML_VEC_MAD_UNROLL]; @@ -128,15 +128,15 @@ // #endif // } -// inline static void NN_step_F32 (const int n, float *y, const float *x) { for (int i = 0; i < n; i += 1) y[i] = (x[i] > 0.f) ? 1.f : 0.f; } -// inline static void NN_tanh_F32 (const int n, float *y, const float *x) { for (int i = 0; i < n; i += 1) y[i] = tanhf(x[i]); } -// inline static void NN_elu_F32 (const int n, float *y, const float *x) { for (int i = 0; i < n; i += 1) y[i] = (x[i] > 0.f) ? x[i] : expf(x[i])-1; } -// inline static void NN_relu_F32 (const int n, float *y, const float *x) { for (int i = 0; i < n; i += 1) y[i] = (x[i] > 0.f) ? x[i] : 0.f; } -// inline static void NN_leaky_relu_F32 (const int n, float *y, const float *x, const float ns) { for (int i = 0; i < n; i += 1) y[i] = ((x[i] > 0.f) ? x[i] : 0.f) + ns * ((x[i] < 0.0f) ? x[i] : 0.f); } -// inline static void NN_sigmoid_F32 (const int n, float *y, const float *x) { for (int i = 0; i < n; i += 1) y[i] = 1.f / (1.f + expf(-x[i])); } +// inline static void nn_step_F32 (const int n, float *y, const float *x) { for (int i = 0; i < n; i += 1) y[i] = (x[i] > 0.f) ? 1.f : 0.f; } +// inline static void nn_tanh_F32 (const int n, float *y, const float *x) { for (int i = 0; i < n; i += 1) y[i] = tanhf(x[i]); } +// inline static void nn_elu_F32 (const int n, float *y, const float *x) { for (int i = 0; i < n; i += 1) y[i] = (x[i] > 0.f) ? x[i] : expf(x[i])-1; } +// inline static void nn_relu_F32 (const int n, float *y, const float *x) { for (int i = 0; i < n; i += 1) y[i] = (x[i] > 0.f) ? x[i] : 0.f; } +// inline static void nn_leaky_relu_F32 (const int n, float *y, const float *x, const float ns) { for (int i = 0; i < n; i += 1) y[i] = ((x[i] > 0.f) ? x[i] : 0.f) + ns * ((x[i] < 0.0f) ? x[i] : 0.f); } +// inline static void nn_sigmoid_F32 (const int n, float *y, const float *x) { for (int i = 0; i < n; i += 1) y[i] = 1.f / (1.f + expf(-x[i])); } // // TODO: optimize performance -// inline static void NN_hardswish_F32 (const int n, float *y, const float *x) { for (int i = 0; i < n; i += 1) y[i] = x[i] * fminf(1.0f, fmaxf(0.0f, (x[i] + 3.0f) / 6.0f)); } -// inline static void NN_hardsigmoid_F32 (const int n, float *y, const float *x) { for (int i = 0; i < n; i += 1) y[i] = fminf(1.0f, fmaxf(0.0f, (x[i] + 3.0f) / 6.0f)); } +// inline static void nn_hardswish_F32 (const int n, float *y, const float *x) { for (int i = 0; i < n; i += 1) y[i] = x[i] * fminf(1.0f, fmaxf(0.0f, (x[i] + 3.0f) / 6.0f)); } +// inline static void nn_hardsigmoid_F32 (const int n, float *y, const float *x) { for (int i = 0; i < n; i += 1) y[i] = fminf(1.0f, fmaxf(0.0f, (x[i] + 3.0f) / 6.0f)); } // static const float GELU_COEF_A = 0.044715f; // static const float GELU_QUICK_COEF = -1.702f; @@ -146,7 +146,7 @@ // return 0.5f*x*(1.0f + tanhf(SQRT_2_OVER_PI*x*(1.0f + GELU_COEF_A*x*x))); // } -// inline static void NN_gelu_f16(const int n, float16_t * y, const float16_t * x) { +// inline static void nn_gelu_f16(const int n, float16_t * y, const float16_t * x) { // const uint16_t * i16 = (const uint16_t *) x; // for (int i = 0; i < n; i += 1) { // y[i] = ggml_table_gelu_f16[i16[i]]; @@ -154,7 +154,7 @@ // } // #ifdef GGML_GELU_FP16 -// inline static void NN_gelu_F32(const int n, float *y, const float *x) { +// inline static void nn_gelu_F32(const int n, float *y, const float *x) { // uint16_t t; // for (int i = 0; i < n; i += 1) { // if (x[i] <= -10.0f) { @@ -169,7 +169,7 @@ // } // } // #else -// inline static void NN_gelu_F32(const int n, float *y, const float *x) { +// inline static void nn_gelu_F32(const int n, float *y, const float *x) { // for (int i = 0; i < n; i += 1) { // y[i] = ggml_gelu_F32(x[i]); // } @@ -180,7 +180,7 @@ // return x*(1.0f/(1.0f+expf(GELU_QUICK_COEF*x))); // } -// //inline static void NN_gelu_quick_f16(const int n, float16_t * y, const float16_t * x) { +// //inline static void nn_gelu_quick_f16(const int n, float16_t * y, const float16_t * x) { // // const uint16_t * i16 = (const uint16_t *) x; // // for (int i = 0; i < n; i += 1) { // // y[i] = ggml_table_gelu_quick_f16[i16[i]]; @@ -188,7 +188,7 @@ // //} // #ifdef GGML_GELU_QUICK_FP16 -// inline static void NN_gelu_quick_F32(const int n, float *y, const float *x) { +// inline static void nn_gelu_quick_F32(const int n, float *y, const float *x) { // uint16_t t; // for (int i = 0; i < n; i += 1) { // float16_t fp16 = GGML_FP32_TO_FP16(x[i]); @@ -197,7 +197,7 @@ // } // } // #else -// inline static void NN_gelu_quick_F32(const int n, float *y, const float *x) { +// inline static void nn_gelu_quick_F32(const int n, float *y, const float *x) { // for (int i = 0; i < n; i += 1) { // y[i] = ggml_gelu_quick_F32(x[i]); // } @@ -407,7 +407,7 @@ // #endif // __ARM_NEON / __AVX2__ / __SSE2__ -// static void NN_silu_F32(const int n, float *y, const float *x) { +// static void nn_silu_F32(const int n, float *y, const float *x) { // int i = 0; // #if defined(__AVX512F__) && defined(__AVX512DQ__) // for (; i + 15 < n; i += 16) { @@ -437,12 +437,12 @@ // return dy*s*(1.0f + x*(1.0f - s)); // } -// inline static void NN_silu_backward_F32(const int n, float *dx, const float *x, const float *dy) { +// inline static void nn_silu_backward_F32(const int n, float *dx, const float *x, const float *dy) { // for (int i = 0; i < n; i += 1) { // dx[i] = ggml_silu_backward_F32(x[i], dy[i]); // } // } -// inline static void NN_argmax_F32(const int n, int * s, const float *x) { +// inline static void nn_argmax_F32(const int n, int * s, const float *x) { // float max = -INFINITY; // int idx = 0; // for (int i = 0; i < n; i += 1) { diff --git a/nn/nn_todo b/nn/nn_todo index fa6514d..6d553fe 100644 --- a/nn/nn_todo +++ b/nn/nn_todo @@ -2,13 +2,13 @@ /* * ====== Math Functions ====== */ -void NN_initMatrix(Matrix *m, size_t rows, size_t cols) { +void nn_initMatrix(Matrix *m, size_t rows, size_t cols) { m->rows = rows; m->cols = cols; m->data = malloc(rows * cols * sizeof(float)); } -void NN_concatenate(Matrix *out, Matrix *a, Matrix *b) { +void nn_concatenate(Matrix *out, Matrix *a, Matrix *b) { for (size_t i = 0; i < a->cols; i += 1) { out->data[i] = a->data[i]; } @@ -17,7 +17,7 @@ void NN_concatenate(Matrix *out, Matrix *a, Matrix *b) { } } -size_t NN_argmax(Matrix *a) { +size_t nn_argmax(Matrix *a) { int max_index = 0; float max_value = a->data[0]; for (size_t i = 1; i < a->cols; i += 1) { @@ -33,7 +33,7 @@ size_t NN_argmax(Matrix *a) { * ====== Operators ====== */ -void NN_logSoftmax(Matrix *out, Matrix *a) { +void nn_logSoftmax(Matrix *out, Matrix *a) { float sum = 0; for (size_t i = 0; i < a->cols; i += 1) { sum += exp(a->data[i]); diff --git a/src/avx/abs.c b/src/avx/abs.c index e7e015a..1a526a8 100644 --- a/src/avx/abs.c +++ b/src/avx/abs.c @@ -5,7 +5,7 @@ #ifdef AVX -// void NN_abs_f32(size_t n, float *result, float *x, size_t incx) { +// void nn_abs_f32(size_t n, float *result, float *x, size_t incx) { // // Mask to clear the sign bit // __m256 mask = _mm256_castsi256_ps(_mm256_set1_epi32(0x7FFFFFFF)); diff --git a/src/cpu/add.c b/src/cpu/add.c index e6a44b5..a225ff6 100644 --- a/src/cpu/add.c +++ b/src/cpu/add.c @@ -1,9 +1,9 @@ #include "nn.h" -__attribute__((weak)) void NN_add1d_f16(Tensor1D_F16 *y, const Tensor1D_F16 *x1, const Tensor1D_F16 *x2) { - NN_assert(x1->shape[0] == x2->shape[0], "Cannot add tensors of different shapes"); - NN_assert(y->shape[0] == x1->shape[0], "Cannot add tensors of different shapes"); +__attribute__((weak)) void nn_add1d_f16(Tensor1D_F16 *y, const Tensor1D_F16 *x1, const Tensor1D_F16 *x2) { + nn_assert(x1->shape[0] == x2->shape[0], "Cannot add tensors of different shapes"); + nn_assert(y->shape[0] == x1->shape[0], "Cannot add tensors of different shapes"); size_t n = y->shape[0]; for (size_t i = 0; i < n; i += 1) { @@ -11,9 +11,9 @@ __attribute__((weak)) void NN_add1d_f16(Tensor1D_F16 *y, const Tensor1D_F16 *x1, } } -__attribute__((weak)) void NN_add1d_f32(Tensor1D_F32 *y, const Tensor1D_F32 *x1, const Tensor1D_F32 *x2) { - NN_assert(x1->shape[0] == x2->shape[0], "Cannot add tensors of different shapes"); - NN_assert(y->shape[0] == x1->shape[0], "Cannot add tensors of different shapes"); +__attribute__((weak)) void nn_add1d_f32(Tensor1D_F32 *y, const Tensor1D_F32 *x1, const Tensor1D_F32 *x2) { + nn_assert(x1->shape[0] == x2->shape[0], "Cannot add tensors of different shapes"); + nn_assert(y->shape[0] == x1->shape[0], "Cannot add tensors of different shapes"); size_t n = y->shape[0]; for (size_t i = 0; i < n; i += 1) { @@ -21,9 +21,9 @@ __attribute__((weak)) void NN_add1d_f32(Tensor1D_F32 *y, const Tensor1D_F32 *x1, } } -__attribute__((weak)) void NN_add2d_f16(Tensor2D_F16 *y, const Tensor2D_F16 *x1, const Tensor2D_F16 *x2) { - NN_assert(x1->shape[0] == x2->shape[0] && x1->shape[1] == x2->shape[1], "Cannot add tensors of different shapes"); - NN_assert(y->shape[0] == x1->shape[0] && y->shape[1] == x1->shape[1], "Cannot add tensors of different shapes"); +__attribute__((weak)) void nn_add2d_f16(Tensor2D_F16 *y, const Tensor2D_F16 *x1, const Tensor2D_F16 *x2) { + nn_assert(x1->shape[0] == x2->shape[0] && x1->shape[1] == x2->shape[1], "Cannot add tensors of different shapes"); + nn_assert(y->shape[0] == x1->shape[0] && y->shape[1] == x1->shape[1], "Cannot add tensors of different shapes"); size_t n = y->shape[0] * y->shape[1]; for (size_t i = 0; i < n; i += 1) { @@ -31,9 +31,9 @@ __attribute__((weak)) void NN_add2d_f16(Tensor2D_F16 *y, const Tensor2D_F16 *x1, } } -__attribute__((weak)) void NN_add2d_f32(Tensor2D_F32 *y, const Tensor2D_F32 *x1, const Tensor2D_F32 *x2) { - NN_assert(x1->shape[0] == x2->shape[0] && x1->shape[1] == x2->shape[1], "Cannot add tensors of different shapes"); - NN_assert(y->shape[0] == x1->shape[0] && y->shape[1] == x1->shape[1], "Cannot add tensors of different shapes"); +__attribute__((weak)) void nn_add2d_f32(Tensor2D_F32 *y, const Tensor2D_F32 *x1, const Tensor2D_F32 *x2) { + nn_assert(x1->shape[0] == x2->shape[0] && x1->shape[1] == x2->shape[1], "Cannot add tensors of different shapes"); + nn_assert(y->shape[0] == x1->shape[0] && y->shape[1] == x1->shape[1], "Cannot add tensors of different shapes"); size_t n = y->shape[0] * y->shape[1]; for (size_t i = 0; i < n; i += 1) { diff --git a/src/cpu/addmm.c b/src/cpu/addmm.c index 9cc887d..6c01a64 100644 --- a/src/cpu/addmm.c +++ b/src/cpu/addmm.c @@ -1,10 +1,10 @@ #include "nn.h" -__attribute__((weak)) void NN_addmm_f16(Tensor2D_F16 *y, const Tensor2D_F16 *x, const Tensor2D_F16 *weight, const Tensor1D_F16 *bias) { - NN_assert(x->shape[1] == weight->shape[1], "Cannot perform Linear on tensors of different shapes"); - NN_assert(bias->shape[0] == weight->shape[0], "Cannot perform Linear on tensors of different shapes"); - NN_assert(y->shape[0] == x->shape[0] && y->shape[1] == weight->shape[0], "Cannot perform Linear on tensors of different shapes"); +__attribute__((weak)) void nn_addmm_f16(Tensor2D_F16 *y, const Tensor2D_F16 *x, const Tensor2D_F16 *weight, const Tensor1D_F16 *bias) { + nn_assert(x->shape[1] == weight->shape[1], "Cannot perform Linear on tensors of different shapes"); + nn_assert(bias->shape[0] == weight->shape[0], "Cannot perform Linear on tensors of different shapes"); + nn_assert(y->shape[0] == x->shape[0] && y->shape[1] == weight->shape[0], "Cannot perform Linear on tensors of different shapes"); const size_t batch_size = x->shape[0]; const size_t in_features = x->shape[1]; @@ -22,10 +22,10 @@ __attribute__((weak)) void NN_addmm_f16(Tensor2D_F16 *y, const Tensor2D_F16 *x, } -__attribute__((weak)) void NN_addmm_f32(Tensor2D_F32 *y, const Tensor2D_F32 *x, const Tensor2D_F32 *weight, const Tensor1D_F32 *bias) { - NN_assert(x->shape[1] == weight->shape[1], "Cannot perform Linear on tensors of different shapes"); - NN_assert(bias->shape[0] == weight->shape[0], "Cannot perform Linear on tensors of different shapes"); - NN_assert(y->shape[0] == x->shape[0] && y->shape[1] == weight->shape[0], "Cannot perform Linear on tensors of different shapes"); +__attribute__((weak)) void nn_addmm_f32(Tensor2D_F32 *y, const Tensor2D_F32 *x, const Tensor2D_F32 *weight, const Tensor1D_F32 *bias) { + nn_assert(x->shape[1] == weight->shape[1], "Cannot perform Linear on tensors of different shapes"); + nn_assert(bias->shape[0] == weight->shape[0], "Cannot perform Linear on tensors of different shapes"); + nn_assert(y->shape[0] == x->shape[0] && y->shape[1] == weight->shape[0], "Cannot perform Linear on tensors of different shapes"); const size_t batch_size = x->shape[0]; const size_t in_features = x->shape[1]; diff --git a/src/cpu/addscalar.c b/src/cpu/addscalar.c index b6e6afb..30fa8d4 100644 --- a/src/cpu/addscalar.c +++ b/src/cpu/addscalar.c @@ -1,8 +1,8 @@ #include "nn.h" -__attribute__((weak)) void NN_addscalar1d_f16(Tensor1D_F16 *y, const Tensor1D_F16 *x, float16_t scalar) { - NN_assert(y->shape[0] == x->shape[0], "Cannot add tensors of different shapes"); +__attribute__((weak)) void nn_addscalar1d_f16(Tensor1D_F16 *y, const Tensor1D_F16 *x, float16_t scalar) { + nn_assert(y->shape[0] == x->shape[0], "Cannot add tensors of different shapes"); size_t n = y->shape[0]; for (size_t i = 0; i < n; i += 1) { @@ -10,8 +10,8 @@ __attribute__((weak)) void NN_addscalar1d_f16(Tensor1D_F16 *y, const Tensor1D_F1 } } -__attribute__((weak)) void NN_addscalar1d_f32(Tensor1D_F32 *y, const Tensor1D_F32 *x, float scalar) { - NN_assert(y->shape[0] == x->shape[0], "Cannot add tensors of different shapes"); +__attribute__((weak)) void nn_addscalar1d_f32(Tensor1D_F32 *y, const Tensor1D_F32 *x, float scalar) { + nn_assert(y->shape[0] == x->shape[0], "Cannot add tensors of different shapes"); size_t n = y->shape[0]; for (size_t i = 0; i < n; i += 1) { @@ -19,8 +19,8 @@ __attribute__((weak)) void NN_addscalar1d_f32(Tensor1D_F32 *y, const Tensor1D_F3 } } -__attribute__((weak)) void NN_addscalar2d_f16(Tensor2D_F16 *y, const Tensor2D_F16 *x, float16_t scalar) { - NN_assert(y->shape[0] == x->shape[0] && y->shape[1] == x->shape[1], "Cannot add tensors of different shapes"); +__attribute__((weak)) void nn_addscalar2d_f16(Tensor2D_F16 *y, const Tensor2D_F16 *x, float16_t scalar) { + nn_assert(y->shape[0] == x->shape[0] && y->shape[1] == x->shape[1], "Cannot add tensors of different shapes"); size_t n = y->shape[0] * y->shape[1]; for (size_t i = 0; i < n; i += 1) { @@ -28,8 +28,8 @@ __attribute__((weak)) void NN_addscalar2d_f16(Tensor2D_F16 *y, const Tensor2D_F1 } } -__attribute__((weak)) void NN_addscalar2d_f32(Tensor2D_F32 *y, const Tensor2D_F32 *x, float scalar) { - NN_assert(y->shape[0] == x->shape[0] && y->shape[1] == x->shape[1], "Cannot add tensors of different shapes"); +__attribute__((weak)) void nn_addscalar2d_f32(Tensor2D_F32 *y, const Tensor2D_F32 *x, float scalar) { + nn_assert(y->shape[0] == x->shape[0] && y->shape[1] == x->shape[1], "Cannot add tensors of different shapes"); size_t n = y->shape[0] * y->shape[1]; for (size_t i = 0; i < n; i += 1) { diff --git a/src/cpu/dot.c b/src/cpu/dot.c index 6a764b4..56aef32 100644 --- a/src/cpu/dot.c +++ b/src/cpu/dot.c @@ -1,7 +1,7 @@ #include "ops/dot.h" -__attribute__((weak)) void NN_dot_i8_to_i32(size_t n, int32_t *r, const int8_t *x, size_t incx, const int8_t *y, size_t incy) { +__attribute__((weak)) void nn_dot_i8_to_i32(size_t n, int32_t *r, const int8_t *x, size_t incx, const int8_t *y, size_t incy) { int32_t sum = 0; for (size_t i = 0; i < n; i += 1) { sum += x[i * incx] * y[i * incy]; @@ -9,7 +9,7 @@ __attribute__((weak)) void NN_dot_i8_to_i32(size_t n, int32_t *r, const int8_t * *r = sum; } -__attribute__((weak)) void NN_dot_i16_to_i32(size_t n, int32_t *r, const int16_t *x, size_t incx, const int16_t *y, size_t incy) { +__attribute__((weak)) void nn_dot_i16_to_i32(size_t n, int32_t *r, const int16_t *x, size_t incx, const int16_t *y, size_t incy) { int32_t sum = 0; for (size_t i = 0; i < n; i += 1) { sum += x[i * incx] * y[i * incy]; @@ -17,7 +17,7 @@ __attribute__((weak)) void NN_dot_i16_to_i32(size_t n, int32_t *r, const int16_t *r = sum; } -__attribute__((weak)) void NN_dot_i32(size_t n, int32_t *r, const int32_t *x, size_t incx, const int32_t *y, size_t incy) { +__attribute__((weak)) void nn_dot_i32(size_t n, int32_t *r, const int32_t *x, size_t incx, const int32_t *y, size_t incy) { int32_t sum = 0; for (size_t i = 0; i < n; i += 1) { sum += x[i * incx] * y[i * incy]; @@ -25,15 +25,15 @@ __attribute__((weak)) void NN_dot_i32(size_t n, int32_t *r, const int32_t *x, si *r = sum; } -__attribute__((weak)) void NN_dot_f16(size_t n, float16_t *r, const float16_t *x, size_t incx, const float16_t *y, size_t incy) { +__attribute__((weak)) void nn_dot_f16(size_t n, float16_t *r, const float16_t *x, size_t incx, const float16_t *y, size_t incy) { float sum_f32 = 0; for (size_t i = 0; i < n; i += 1) { - sum_f32 += NN_half_to_float(x[i * incx]) * NN_half_to_float(y[i * incy]); + sum_f32 += nn_half_to_float(x[i * incx]) * nn_half_to_float(y[i * incy]); } - *r = NN_float_to_half(sum_f32); + *r = nn_float_to_half(sum_f32); } -__attribute__((weak)) void NN_dot_f32(size_t n, float *r, const float *x, size_t incx, const float *y, size_t incy) { +__attribute__((weak)) void nn_dot_f32(size_t n, float *r, const float *x, size_t incx, const float *y, size_t incy) { float sum = 0.0; for (size_t i = 0; i < n; i += 1) { sum += x[i * incx] * y[i * incy]; diff --git a/src/cpu/elu.c b/src/cpu/elu.c index 2ec952f..4254d46 100644 --- a/src/cpu/elu.c +++ b/src/cpu/elu.c @@ -1,8 +1,8 @@ #include "nn.h" -__attribute__((weak)) void NN_elu2d_f16(Tensor2D_F16 *y, const Tensor2D_F16 *x, float alpha) { - NN_assert(x->shape[0] == y->shape[0] && x->shape[1] == y->shape[1], "Cannot perform ELU on tensors of different shapes"); +__attribute__((weak)) void nn_elu2d_f16(Tensor2D_F16 *y, const Tensor2D_F16 *x, float alpha) { + nn_assert(x->shape[0] == y->shape[0] && x->shape[1] == y->shape[1], "Cannot perform ELU on tensors of different shapes"); const size_t n = y->shape[0] * y->shape[1]; for (size_t i = 0; i < n; i += 1) { @@ -16,8 +16,8 @@ __attribute__((weak)) void NN_elu2d_f16(Tensor2D_F16 *y, const Tensor2D_F16 *x, } -__attribute__((weak)) void NN_elu2d_f32(Tensor2D_F32 *y, const Tensor2D_F32 *x, float alpha) { - NN_assert(x->shape[0] == y->shape[0] && x->shape[1] == y->shape[1], "Cannot perform ELU on tensors of different shapes"); +__attribute__((weak)) void nn_elu2d_f32(Tensor2D_F32 *y, const Tensor2D_F32 *x, float alpha) { + nn_assert(x->shape[0] == y->shape[0] && x->shape[1] == y->shape[1], "Cannot perform ELU on tensors of different shapes"); const size_t n = y->shape[0] * y->shape[1]; for (size_t i = 0; i < n; i += 1) { diff --git a/src/cpu/equals.c b/src/cpu/equals.c index d7612bd..c8ca448 100644 --- a/src/cpu/equals.c +++ b/src/cpu/equals.c @@ -1,16 +1,16 @@ #include "nn.h" -__attribute__((weak)) uint8_t NN_equals0d_f16(const Tensor0D_F16 *a, const Tensor0D_F16 *b, float rel_err) { +__attribute__((weak)) uint8_t nn_equals0d_f16(const Tensor0D_F16 *a, const Tensor0D_F16 *b, float rel_err) { return float_equal(as_f32(a->data), as_f32(b->data), rel_err); } -__attribute__((weak)) uint8_t NN_equals0d_f32(const Tensor0D_F32 *a, const Tensor0D_F32 *b, float rel_err) { +__attribute__((weak)) uint8_t nn_equals0d_f32(const Tensor0D_F32 *a, const Tensor0D_F32 *b, float rel_err) { return float_equal(a->data, b->data, rel_err); } -__attribute__((weak)) uint8_t NN_equals1d_f16(const Tensor1D_F16 *a, const Tensor1D_F16 *b, float rel_err) { - NN_assert(a->shape[0] == b->shape[0], "Cannot compare tensors of different shapes"); +__attribute__((weak)) uint8_t nn_equals1d_f16(const Tensor1D_F16 *a, const Tensor1D_F16 *b, float rel_err) { + nn_assert(a->shape[0] == b->shape[0], "Cannot compare tensors of different shapes"); size_t n = a->shape[0]; for (size_t i = 0; i < n; i += 1) { @@ -21,8 +21,8 @@ __attribute__((weak)) uint8_t NN_equals1d_f16(const Tensor1D_F16 *a, const Tenso return 1; } -__attribute__((weak)) uint8_t NN_equals1d_f32(const Tensor1D_F32 *a, const Tensor1D_F32 *b, float rel_err) { - NN_assert(a->shape[0] == b->shape[0], "Cannot compare tensors of different shapes"); +__attribute__((weak)) uint8_t nn_equals1d_f32(const Tensor1D_F32 *a, const Tensor1D_F32 *b, float rel_err) { + nn_assert(a->shape[0] == b->shape[0], "Cannot compare tensors of different shapes"); size_t n = a->shape[0]; for (size_t i = 0; i < n; i += 1) { @@ -33,8 +33,8 @@ __attribute__((weak)) uint8_t NN_equals1d_f32(const Tensor1D_F32 *a, const Tenso return 1; } -__attribute__((weak)) uint8_t NN_equals2d_f16(const Tensor2D_F16 *a, const Tensor2D_F16 *b, float rel_err) { - NN_assert(a->shape[0] == b->shape[0] && a->shape[1] == b->shape[1], "Cannot compare tensors of different shapes"); +__attribute__((weak)) uint8_t nn_equals2d_f16(const Tensor2D_F16 *a, const Tensor2D_F16 *b, float rel_err) { + nn_assert(a->shape[0] == b->shape[0] && a->shape[1] == b->shape[1], "Cannot compare tensors of different shapes"); size_t n = a->shape[0] * a->shape[1]; for (size_t i = 0; i < n; i += 1) { @@ -45,8 +45,8 @@ __attribute__((weak)) uint8_t NN_equals2d_f16(const Tensor2D_F16 *a, const Tenso return 1; } -__attribute__((weak)) uint8_t NN_equals2d_f32(const Tensor2D_F32 *a, const Tensor2D_F32 *b, float rel_err) { - NN_assert(a->shape[0] == b->shape[0] && a->shape[1] == b->shape[1], "Cannot compare tensors of different shapes"); +__attribute__((weak)) uint8_t nn_equals2d_f32(const Tensor2D_F32 *a, const Tensor2D_F32 *b, float rel_err) { + nn_assert(a->shape[0] == b->shape[0] && a->shape[1] == b->shape[1], "Cannot compare tensors of different shapes"); size_t n = a->shape[0] * a->shape[1]; for (size_t i = 0; i < n; i += 1) { diff --git a/src/cpu/max.c b/src/cpu/max.c index 6701ee4..7b8f89c 100644 --- a/src/cpu/max.c +++ b/src/cpu/max.c @@ -1,7 +1,7 @@ #include "nn.h" -__attribute__((weak)) void NN_max1d_f16(Tensor0D_F16 *y, const Tensor1D_F16 *x) { +__attribute__((weak)) void nn_max1d_f16(Tensor0D_F16 *y, const Tensor1D_F16 *x) { y->data = -FLT_MAX; size_t n = x->shape[0]; for (size_t i = 0; i < n; i += 1) { @@ -11,7 +11,7 @@ __attribute__((weak)) void NN_max1d_f16(Tensor0D_F16 *y, const Tensor1D_F16 *x) return y->data; } -__attribute__((weak)) void NN_max1d_f32(Tensor0D_F32 *y, const Tensor1D_F32 *x) { +__attribute__((weak)) void nn_max1d_f32(Tensor0D_F32 *y, const Tensor1D_F32 *x) { y->data = -FLT_MAX; size_t n = x->shape[0]; for (size_t i = 0; i < n; i += 1) { @@ -21,7 +21,7 @@ __attribute__((weak)) void NN_max1d_f32(Tensor0D_F32 *y, const Tensor1D_F32 *x) return y->data; } -__attribute__((weak)) void NN_max2d_f16(Tensor0D_F16 *y, const Tensor2D_F16 *x) { +__attribute__((weak)) void nn_max2d_f16(Tensor0D_F16 *y, const Tensor2D_F16 *x) { y->data = -FLT_MAX; size_t n = x->shape[0] * x->shape[1]; for (size_t i = 0; i < n; i += 1) { @@ -31,7 +31,7 @@ __attribute__((weak)) void NN_max2d_f16(Tensor0D_F16 *y, const Tensor2D_F16 *x) return y->data; } -__attribute__((weak)) void NN_max2d_f32(Tensor0D_F32 *y, const Tensor2D_F32 *x) { +__attribute__((weak)) void nn_max2d_f32(Tensor0D_F32 *y, const Tensor2D_F32 *x) { y->data = -FLT_MAX; size_t n = x->shape[0] * x->shape[1]; for (size_t i = 0; i < n; i += 1) { diff --git a/src/cpu/min.c b/src/cpu/min.c index b2ebead..5d0a7e6 100644 --- a/src/cpu/min.c +++ b/src/cpu/min.c @@ -1,7 +1,7 @@ #include "nn.h" -__attribute__((weak)) void NN_min1d_f16(Tensor0D_F16 *y, const Tensor1D_F16 *x) { +__attribute__((weak)) void nn_min1d_f16(Tensor0D_F16 *y, const Tensor1D_F16 *x) { y->data = FLT_MAX; size_t n = x->shape[0]; for (size_t i = 0; i < n; i += 1) { @@ -11,7 +11,7 @@ __attribute__((weak)) void NN_min1d_f16(Tensor0D_F16 *y, const Tensor1D_F16 *x) return y->data; } -__attribute__((weak)) void NN_min1d_f32(Tensor0D_F32 *y, const Tensor1D_F32 *x) { +__attribute__((weak)) void nn_min1d_f32(Tensor0D_F32 *y, const Tensor1D_F32 *x) { y->data = FLT_MAX; size_t n = x->shape[0]; for (size_t i = 0; i < n; i += 1) { @@ -21,7 +21,7 @@ __attribute__((weak)) void NN_min1d_f32(Tensor0D_F32 *y, const Tensor1D_F32 *x) return y->data; } -__attribute__((weak)) void NN_min2d_f16(Tensor0D_F16 *y, const Tensor2D_F16 *x) { +__attribute__((weak)) void nn_min2d_f16(Tensor0D_F16 *y, const Tensor2D_F16 *x) { y->data = FLT_MAX; size_t n = x->shape[0] * x->shape[1]; for (size_t i = 0; i < n; i += 1) { @@ -31,7 +31,7 @@ __attribute__((weak)) void NN_min2d_f16(Tensor0D_F16 *y, const Tensor2D_F16 *x) return y->data; } -__attribute__((weak)) void NN_min2d_f32(Tensor0D_F32 *y, const Tensor2D_F32 *x) { +__attribute__((weak)) void nn_min2d_f32(Tensor0D_F32 *y, const Tensor2D_F32 *x) { y->data = FLT_MAX; size_t n = x->shape[0] * x->shape[1]; for (size_t i = 0; i < n; i += 1) { diff --git a/src/cpu/mm.c b/src/cpu/mm.c index 9408112..7faedd9 100644 --- a/src/cpu/mm.c +++ b/src/cpu/mm.c @@ -1,9 +1,9 @@ #include "nn.h" -__attribute__((weak)) void NN_mm_f16(Tensor2D_F16 *y, const Tensor2D_F16 *x1, const Tensor2D_F16 *x2) { - NN_assert(x1->shape[1] == x2->shape[1], "Cannot perform MatMul on tensors of different shapes"); - NN_assert(y->shape[0] == x1->shape[0] && y->shape[1] == x2->shape[0], "Cannot perform MatMul on tensors of different shapes"); +__attribute__((weak)) void nn_mm_f16(Tensor2D_F16 *y, const Tensor2D_F16 *x1, const Tensor2D_F16 *x2) { + nn_assert(x1->shape[1] == x2->shape[1], "Cannot perform MatMul on tensors of different shapes"); + nn_assert(y->shape[0] == x1->shape[0] && y->shape[1] == x2->shape[0], "Cannot perform MatMul on tensors of different shapes"); const size_t batch_size = x1->shape[0]; const size_t in_features = x1->shape[1]; @@ -20,9 +20,9 @@ __attribute__((weak)) void NN_mm_f16(Tensor2D_F16 *y, const Tensor2D_F16 *x1, co } } -__attribute__((weak)) void NN_mm_f32(Tensor2D_F32 *y, const Tensor2D_F32 *x1, const Tensor2D_F32 *x2) { - NN_assert(x1->shape[1] == x2->shape[1], "Cannot perform MatMul on tensors of different shapes"); - NN_assert(y->shape[0] == x1->shape[0] && y->shape[1] == x2->shape[0], "Cannot perform MatMul on tensors of different shapes"); +__attribute__((weak)) void nn_mm_f32(Tensor2D_F32 *y, const Tensor2D_F32 *x1, const Tensor2D_F32 *x2) { + nn_assert(x1->shape[1] == x2->shape[1], "Cannot perform MatMul on tensors of different shapes"); + nn_assert(y->shape[0] == x1->shape[0] && y->shape[1] == x2->shape[0], "Cannot perform MatMul on tensors of different shapes"); const size_t batch_size = x1->shape[0]; const size_t in_features = x1->shape[1]; diff --git a/src/cpu/mul.c b/src/cpu/mul.c index 94bdcb2..4575e24 100644 --- a/src/cpu/mul.c +++ b/src/cpu/mul.c @@ -1,9 +1,9 @@ #include "nn.h" -__attribute__((weak)) void NN_mul1d_f16(Tensor1D_F16 *y, const Tensor1D_F16 *x1, const Tensor1D_F16 *x2) { - NN_assert(x1->shape[0] == x2->shape[0], "Cannot add tensors of different shapes"); - NN_assert(y->shape[0] == x1->shape[0], "Cannot add tensors of different shapes"); +__attribute__((weak)) void nn_mul1d_f16(Tensor1D_F16 *y, const Tensor1D_F16 *x1, const Tensor1D_F16 *x2) { + nn_assert(x1->shape[0] == x2->shape[0], "Cannot add tensors of different shapes"); + nn_assert(y->shape[0] == x1->shape[0], "Cannot add tensors of different shapes"); size_t n = y->shape[0]; for (size_t i = 0; i < n; i += 1) { @@ -11,9 +11,9 @@ __attribute__((weak)) void NN_mul1d_f16(Tensor1D_F16 *y, const Tensor1D_F16 *x1, } } -__attribute__((weak)) void NN_mul1d_f32(Tensor1D_F32 *y, const Tensor1D_F32 *x1, const Tensor1D_F32 *x2) { - NN_assert(x1->shape[0] == x2->shape[0], "Cannot add tensors of different shapes"); - NN_assert(y->shape[0] == x1->shape[0], "Cannot add tensors of different shapes"); +__attribute__((weak)) void nn_mul1d_f32(Tensor1D_F32 *y, const Tensor1D_F32 *x1, const Tensor1D_F32 *x2) { + nn_assert(x1->shape[0] == x2->shape[0], "Cannot add tensors of different shapes"); + nn_assert(y->shape[0] == x1->shape[0], "Cannot add tensors of different shapes"); size_t n = y->shape[0]; for (size_t i = 0; i < n; i += 1) { @@ -21,9 +21,9 @@ __attribute__((weak)) void NN_mul1d_f32(Tensor1D_F32 *y, const Tensor1D_F32 *x1, } } -__attribute__((weak)) void NN_mul2d_f16(Tensor2D_F16 *y, const Tensor2D_F16 *x1, const Tensor2D_F16 *x2) { - NN_assert(x1->shape[0] == x2->shape[0] && x1->shape[1] == x2->shape[1], "Cannot add tensors of different shapes"); - NN_assert(y->shape[0] == x1->shape[0] && y->shape[1] == x1->shape[1], "Cannot add tensors of different shapes"); +__attribute__((weak)) void nn_mul2d_f16(Tensor2D_F16 *y, const Tensor2D_F16 *x1, const Tensor2D_F16 *x2) { + nn_assert(x1->shape[0] == x2->shape[0] && x1->shape[1] == x2->shape[1], "Cannot add tensors of different shapes"); + nn_assert(y->shape[0] == x1->shape[0] && y->shape[1] == x1->shape[1], "Cannot add tensors of different shapes"); size_t n = y->shape[0] * y->shape[1]; for (size_t i = 0; i < n; i += 1) { @@ -31,9 +31,9 @@ __attribute__((weak)) void NN_mul2d_f16(Tensor2D_F16 *y, const Tensor2D_F16 *x1, } } -__attribute__((weak)) void NN_mul2d_f32(Tensor2D_F32 *y, const Tensor2D_F32 *x1, const Tensor2D_F32 *x2) { - NN_assert(x1->shape[0] == x2->shape[0] && x1->shape[1] == x2->shape[1], "Cannot add tensors of different shapes"); - NN_assert(y->shape[0] == x1->shape[0] && y->shape[1] == x1->shape[1], "Cannot add tensors of different shapes"); +__attribute__((weak)) void nn_mul2d_f32(Tensor2D_F32 *y, const Tensor2D_F32 *x1, const Tensor2D_F32 *x2) { + nn_assert(x1->shape[0] == x2->shape[0] && x1->shape[1] == x2->shape[1], "Cannot add tensors of different shapes"); + nn_assert(y->shape[0] == x1->shape[0] && y->shape[1] == x1->shape[1], "Cannot add tensors of different shapes"); size_t n = y->shape[0] * y->shape[1]; for (size_t i = 0; i < n; i += 1) { diff --git a/src/cpu/mulscalar.c b/src/cpu/mulscalar.c index f6e14ae..24bb485 100644 --- a/src/cpu/mulscalar.c +++ b/src/cpu/mulscalar.c @@ -1,8 +1,8 @@ #include "nn.h" -__attribute__((weak)) void NN_mulscalar1d_f16(Tensor1D_F16 *y, const Tensor1D_F16 *x, float16_t scalar) { - NN_assert(y->shape[0] == x->shape[0], "Cannot add tensors of different shapes"); +__attribute__((weak)) void nn_mulscalar1d_f16(Tensor1D_F16 *y, const Tensor1D_F16 *x, float16_t scalar) { + nn_assert(y->shape[0] == x->shape[0], "Cannot add tensors of different shapes"); size_t n = y->shape[0]; for (size_t i = 0; i < n; i += 1) { @@ -10,8 +10,8 @@ __attribute__((weak)) void NN_mulscalar1d_f16(Tensor1D_F16 *y, const Tensor1D_F1 } } -__attribute__((weak)) void NN_mulscalar1d_f32(Tensor1D_F32 *y, const Tensor1D_F32 *x, float scalar) { - NN_assert(y->shape[0] == x->shape[0], "Cannot add tensors of different shapes"); +__attribute__((weak)) void nn_mulscalar1d_f32(Tensor1D_F32 *y, const Tensor1D_F32 *x, float scalar) { + nn_assert(y->shape[0] == x->shape[0], "Cannot add tensors of different shapes"); size_t n = y->shape[0]; for (size_t i = 0; i < n; i += 1) { @@ -19,8 +19,8 @@ __attribute__((weak)) void NN_mulscalar1d_f32(Tensor1D_F32 *y, const Tensor1D_F3 } } -__attribute__((weak)) void NN_mulscalar2d_f16(Tensor2D_F16 *y, const Tensor2D_F16 *x, float16_t scalar) { - NN_assert(y->shape[0] == x->shape[0] && y->shape[1] == x->shape[1], "Cannot add tensors of different shapes"); +__attribute__((weak)) void nn_mulscalar2d_f16(Tensor2D_F16 *y, const Tensor2D_F16 *x, float16_t scalar) { + nn_assert(y->shape[0] == x->shape[0] && y->shape[1] == x->shape[1], "Cannot add tensors of different shapes"); size_t n = y->shape[0] * y->shape[1]; for (size_t i = 0; i < n; i += 1) { @@ -28,8 +28,8 @@ __attribute__((weak)) void NN_mulscalar2d_f16(Tensor2D_F16 *y, const Tensor2D_F1 } } -__attribute__((weak)) void NN_mulscalar2d_f32(Tensor2D_F32 *y, const Tensor2D_F32 *x, float scalar) { - NN_assert(y->shape[0] == x->shape[0] && y->shape[1] == x->shape[1], "Cannot add tensors of different shapes"); +__attribute__((weak)) void nn_mulscalar2d_f32(Tensor2D_F32 *y, const Tensor2D_F32 *x, float scalar) { + nn_assert(y->shape[0] == x->shape[0] && y->shape[1] == x->shape[1], "Cannot add tensors of different shapes"); size_t n = y->shape[0] * y->shape[1]; for (size_t i = 0; i < n; i += 1) { diff --git a/src/cpu/relu.c b/src/cpu/relu.c index c60025f..e6a3e6d 100644 --- a/src/cpu/relu.c +++ b/src/cpu/relu.c @@ -1,8 +1,8 @@ #include "nn.h" -__attribute__((weak)) void NN_relu2d_f16(Tensor2D_F16 *y, const Tensor2D_F16 *x) { - NN_assert(x->shape[0] == y->shape[0] && x->shape[1] == y->shape[1], "Cannot perform ReLU on tensors of different shapes"); +__attribute__((weak)) void nn_relu2d_f16(Tensor2D_F16 *y, const Tensor2D_F16 *x) { + nn_assert(x->shape[0] == y->shape[0] && x->shape[1] == y->shape[1], "Cannot perform ReLU on tensors of different shapes"); const size_t n = y->shape[0] * y->shape[1]; for (size_t i = 0; i < n; i += 1) { @@ -11,8 +11,8 @@ __attribute__((weak)) void NN_relu2d_f16(Tensor2D_F16 *y, const Tensor2D_F16 *x) } } -__attribute__((weak)) void NN_relu2d_f32(Tensor2D_F32 *y, const Tensor2D_F32 *x) { - NN_assert(x->shape[0] == y->shape[0] && x->shape[1] == y->shape[1], "Cannot perform ReLU on tensors of different shapes"); +__attribute__((weak)) void nn_relu2d_f32(Tensor2D_F32 *y, const Tensor2D_F32 *x) { + nn_assert(x->shape[0] == y->shape[0] && x->shape[1] == y->shape[1], "Cannot perform ReLU on tensors of different shapes"); const size_t n = y->shape[0] * y->shape[1]; for (size_t i = 0; i < n; i += 1) { diff --git a/src/print.c b/src/print.c index 2ba887f..92587bf 100644 --- a/src/print.c +++ b/src/print.c @@ -1,34 +1,34 @@ #include "nn.h" -void NN_print_u8(uint8_t v) { +void nn_print_u8(uint8_t v) { printf("%d", v); } -void NN_print_i8(int8_t v) { +void nn_print_i8(int8_t v) { printf("%d", v); } -void NN_print_u16(uint16_t v) { +void nn_print_u16(uint16_t v) { printf("%d", v); } -void NN_print_i16(int16_t v) { +void nn_print_i16(int16_t v) { printf("%d", v); } -void NN_print_u32(uint32_t v) { +void nn_print_u32(uint32_t v) { printf("%ld", (size_t)v); } -void NN_print_i32(int32_t v) { +void nn_print_i32(int32_t v) { printf("%ld", (size_t)v); } -void NN_print_f16(float16_t v, int16_t num_digits) { - NN_print_f32(as_f32(v), num_digits); +void nn_print_f16(float16_t v, int16_t num_digits) { + nn_print_f32(as_f32(v), num_digits); } -void NN_print_f32(float v, int16_t num_digits) { +void nn_print_f32(float v, int16_t num_digits) { if (isinf(v)) { if (signbit(v)) { printf("-inf"); @@ -65,7 +65,7 @@ void NN_print_f32(float v, int16_t num_digits) { } -void NN_print_shape(size_t ndim, const size_t *shape) { +void nn_print_shape(size_t ndim, const size_t *shape) { printf("("); for (size_t i = 0; i < ndim; i += 1) { printf("%d", (int)shape[i]); @@ -77,10 +77,10 @@ void NN_print_shape(size_t ndim, const size_t *shape) { } -void NN_print_tensor1d_f16(const Tensor1D_F16 *tensor) { +void nn_print_tensor1d_f16(const Tensor1D_F16 *tensor) { printf("["); for (size_t i=0; ishape[0]; i+=1) { - NN_print_f16(*((float16_t *)tensor->data + i), 3); + nn_print_f16(*((float16_t *)tensor->data + i), 3); if (i < tensor->shape[0]-1) { printf(" "); } @@ -88,10 +88,10 @@ void NN_print_tensor1d_f16(const Tensor1D_F16 *tensor) { printf("]\n"); } -void NN_print_tensor1d_f32(const Tensor1D_F32 *tensor) { +void nn_print_tensor1d_f32(const Tensor1D_F32 *tensor) { printf("["); for (size_t i=0; ishape[0]; i+=1) { - NN_print_f32(*((float *)tensor->data + i), 3); + nn_print_f32(*((float *)tensor->data + i), 3); if (i < tensor->shape[0]-1) { printf(" "); } @@ -99,7 +99,7 @@ void NN_print_tensor1d_f32(const Tensor1D_F32 *tensor) { printf("]\n"); } -void NN_print_tensor2d_f16(const Tensor2D_F16 *tensor) { +void nn_print_tensor2d_f16(const Tensor2D_F16 *tensor) { printf("["); for (size_t i=0; ishape[0]; i+=1) { if (i != 0) { @@ -107,7 +107,7 @@ void NN_print_tensor2d_f16(const Tensor2D_F16 *tensor) { } printf("["); for (size_t j=0; jshape[1]; j+=1) { - NN_print_f16(*((float16_t *)tensor->data + i*tensor->shape[1] + j), 3); + nn_print_f16(*((float16_t *)tensor->data + i*tensor->shape[1] + j), 3); if (j < tensor->shape[1]-1) { printf(" "); } @@ -120,7 +120,7 @@ void NN_print_tensor2d_f16(const Tensor2D_F16 *tensor) { printf("]\n"); } -void NN_print_tensor2d_f32(const Tensor2D_F32 *tensor) { +void nn_print_tensor2d_f32(const Tensor2D_F32 *tensor) { printf("["); for (size_t i=0; ishape[0]; i+=1) { if (i != 0) { @@ -128,7 +128,7 @@ void NN_print_tensor2d_f32(const Tensor2D_F32 *tensor) { } printf("["); for (size_t j=0; jshape[1]; j+=1) { - NN_print_f32(*((float *)tensor->data + i*tensor->shape[1] + j), 3); + nn_print_f32(*((float *)tensor->data + i*tensor->shape[1] + j), 3); if (j < tensor->shape[1]-1) { printf(" "); } @@ -141,12 +141,12 @@ void NN_print_tensor2d_f32(const Tensor2D_F32 *tensor) { printf("]\n"); } -// void NN_print_tensor3d_f16(const Tensor3D_F16 *tensor); +// void nn_print_tensor3d_f16(const Tensor3D_F16 *tensor); -// void NN_print_tensor3d_f32(const Tensor3D_F32 *tensor); +// void nn_print_tensor3d_f32(const Tensor3D_F32 *tensor); -// void NN_print_tensor4d_f16(const Tensor4D_F16 *tensor); +// void nn_print_tensor4d_f16(const Tensor4D_F16 *tensor); -// void NN_print_tensor4d_f32(const Tensor4D_F32 *tensor); +// void nn_print_tensor4d_f32(const Tensor4D_F32 *tensor); diff --git a/src/rvv/add.S b/src/rvv/add.S index 5b918dd..012f3fd 100644 --- a/src/rvv/add.S +++ b/src/rvv/add.S @@ -1,6 +1,6 @@ .section .text .align 4 -.globl NN_add_f16_asm +.globl nn_add_f16_asm NN_add_f16_asm: beqz a0, __add_f16_exit __add_f16_loop: @@ -20,7 +20,7 @@ __add_f16_exit: .section .text .align 4 -.globl NN_add_f32_asm +.globl nn_add_f32_asm NN_add_f32_asm: beqz a0, __add_f32_exit __add_f32_loop: diff --git a/src/rvv/add.c b/src/rvv/add.c index f9d336f..a9b05c9 100644 --- a/src/rvv/add.c +++ b/src/rvv/add.c @@ -4,14 +4,14 @@ #ifdef RISCV_V #ifdef RISCV_ZVFH - void NN_add_f16_asm(size_t n, float16_t *y_data, const float16_t *x1_data, const float16_t *x2_data); + void nn_add_f16_asm(size_t n, float16_t *y_data, const float16_t *x1_data, const float16_t *x2_data); #endif -void NN_add_f32_asm(size_t n, float *y_data, const float *x1_data, const float *x2_data); +void nn_add_f32_asm(size_t n, float *y_data, const float *x1_data, const float *x2_data); #ifdef RISCV_ZVFH - void NN_add1d_f16(Tensor1D_F16 *y, const Tensor1D_F16 *x1, const Tensor1D_F16 *x2) { - NN_assert(x1->shape[0] == x2->shape[0], "Cannot add tensors of different shapes"); - NN_assert(y->shape[0] == x1->shape[0], "Cannot add tensors of different shapes"); + void nn_add1d_f16(Tensor1D_F16 *y, const Tensor1D_F16 *x1, const Tensor1D_F16 *x2) { + nn_assert(x1->shape[0] == x2->shape[0], "Cannot add tensors of different shapes"); + nn_assert(y->shape[0] == x1->shape[0], "Cannot add tensors of different shapes"); size_t n = y->shape[0]; float16_t *x1_data = x1->data; @@ -19,7 +19,7 @@ void NN_add_f32_asm(size_t n, float *y_data, const float *x1_data, const float * float16_t *y_data = y->data; #ifdef RISCV_V_ASM - NN_add_f16_asm(n, y_data, x1_data, x2_data); + nn_add_f16_asm(n, y_data, x1_data, x2_data); #else while (n > 0) { size_t vl = __riscv_vsetvl_e16m1(n); @@ -36,9 +36,9 @@ void NN_add_f32_asm(size_t n, float *y_data, const float *x1_data, const float * } #endif -void NN_add1d_f32(Tensor1D_F32 *y, const Tensor1D_F32 *x1, const Tensor1D_F32 *x2) { - NN_assert(x1->shape[0] == x2->shape[0], "Cannot add tensors of different shapes"); - NN_assert(y->shape[0] == x1->shape[0], "Cannot add tensors of different shapes"); +void nn_add1d_f32(Tensor1D_F32 *y, const Tensor1D_F32 *x1, const Tensor1D_F32 *x2) { + nn_assert(x1->shape[0] == x2->shape[0], "Cannot add tensors of different shapes"); + nn_assert(y->shape[0] == x1->shape[0], "Cannot add tensors of different shapes"); size_t n = y->shape[0]; float *x1_data = x1->data; @@ -46,7 +46,7 @@ void NN_add1d_f32(Tensor1D_F32 *y, const Tensor1D_F32 *x1, const Tensor1D_F32 *x float *y_data = y->data; #ifdef RISCV_V_ASM - NN_add_f32_asm(n, y_data, x1_data, x2_data); + nn_add_f32_asm(n, y_data, x1_data, x2_data); #else while (n > 0) { size_t vl = __riscv_vsetvl_e32m1(n); @@ -63,9 +63,9 @@ void NN_add1d_f32(Tensor1D_F32 *y, const Tensor1D_F32 *x1, const Tensor1D_F32 *x } #ifdef RISCV_ZVFH - void NN_add2d_f16(Tensor2D_F16 *y, const Tensor2D_F16 *x1, const Tensor2D_F16 *x2) { - NN_assert(x1->shape[0] == x2->shape[0] && x1->shape[1] == x2->shape[1], "Cannot add tensors of different shapes"); - NN_assert(y->shape[0] == x1->shape[0] && y->shape[1] == x1->shape[1], "Cannot add tensors of different shapes"); + void nn_add2d_f16(Tensor2D_F16 *y, const Tensor2D_F16 *x1, const Tensor2D_F16 *x2) { + nn_assert(x1->shape[0] == x2->shape[0] && x1->shape[1] == x2->shape[1], "Cannot add tensors of different shapes"); + nn_assert(y->shape[0] == x1->shape[0] && y->shape[1] == x1->shape[1], "Cannot add tensors of different shapes"); size_t n = y->shape[0] * y->shape[1]; float16_t *x1_data = x1->data; @@ -73,7 +73,7 @@ void NN_add1d_f32(Tensor1D_F32 *y, const Tensor1D_F32 *x1, const Tensor1D_F32 *x float16_t *y_data = y->data; #ifdef RISCV_V_ASM - NN_add_f16_asm(n, y_data, x1_data, x2_data); + nn_add_f16_asm(n, y_data, x1_data, x2_data); #else while (n > 0) { size_t vl = __riscv_vsetvl_e16m1(n); @@ -90,9 +90,9 @@ void NN_add1d_f32(Tensor1D_F32 *y, const Tensor1D_F32 *x1, const Tensor1D_F32 *x } #endif -void NN_add2d_f32(Tensor2D_F32 *y, const Tensor2D_F32 *x1, const Tensor2D_F32 *x2) { - NN_assert(x1->shape[0] == x2->shape[0] && x1->shape[1] == x2->shape[1], "Cannot add tensors of different shapes"); - NN_assert(y->shape[0] == x1->shape[0] && y->shape[1] == x1->shape[1], "Cannot add tensors of different shapes"); +void nn_add2d_f32(Tensor2D_F32 *y, const Tensor2D_F32 *x1, const Tensor2D_F32 *x2) { + nn_assert(x1->shape[0] == x2->shape[0] && x1->shape[1] == x2->shape[1], "Cannot add tensors of different shapes"); + nn_assert(y->shape[0] == x1->shape[0] && y->shape[1] == x1->shape[1], "Cannot add tensors of different shapes"); size_t n = y->shape[0] * y->shape[1]; float *x1_data = x1->data; @@ -100,7 +100,7 @@ void NN_add2d_f32(Tensor2D_F32 *y, const Tensor2D_F32 *x1, const Tensor2D_F32 *x float *y_data = y->data; #ifdef RISCV_V_ASM - NN_add_f32_asm(n, y_data, x1_data, x2_data); + nn_add_f32_asm(n, y_data, x1_data, x2_data); #else while (n > 0) { size_t vl = __riscv_vsetvl_e32m1(n); diff --git a/src/rvv/addmm.S b/src/rvv/addmm.S index 3d94bbe..6d0936e 100644 --- a/src/rvv/addmm.S +++ b/src/rvv/addmm.S @@ -1,6 +1,6 @@ .section .text .align 4 -.globl NN_addmm_f16_asm +.globl nn_addmm_f16_asm NN_addmm_f16_asm: vsetvli t3, zero, e16, m1, ta, ma beqz a1, __nn_addmm_f16_exit @@ -44,7 +44,7 @@ __nn_addmm_f16_exit: .section .text .align 4 -.globl NN_addmm_f32_asm +.globl nn_addmm_f32_asm NN_addmm_f32_asm: vsetvli t3, zero, e32, m1, ta, ma beqz a1, __nn_addmm_f32_exit diff --git a/src/rvv/addmm.c b/src/rvv/addmm.c index 1264967..193d65a 100644 --- a/src/rvv/addmm.c +++ b/src/rvv/addmm.c @@ -4,15 +4,15 @@ #ifdef RISCV_V #ifdef RISCV_ZVFH - void NN_addmm_f16_asm(size_t in_features, size_t out_features, float16_t *y_data, const float16_t *x_data, const float16_t *weight_data, const float16_t *bias_data); + void nn_addmm_f16_asm(size_t in_features, size_t out_features, float16_t *y_data, const float16_t *x_data, const float16_t *weight_data, const float16_t *bias_data); #endif -void NN_addmm_f32_asm(size_t in_features, size_t out_features, float *y_data, const float *x_data, const float *weight_data, const float *bias_data); +void nn_addmm_f32_asm(size_t in_features, size_t out_features, float *y_data, const float *x_data, const float *weight_data, const float *bias_data); #ifdef RISCV_ZVFH - void NN_addmm_f16(Tensor2D_F16 *y, const Tensor2D_F16 *x, const Tensor2D_F16 *weight, const Tensor1D_F16 *bias) { - NN_assert(x->shape[1] == weight->shape[1], "Cannot perform Linear on tensors of different shapes"); - NN_assert(bias->shape[0] == weight->shape[0], "Cannot perform Linear on tensors of different shapes"); - NN_assert(y->shape[0] == x->shape[0] && y->shape[1] == weight->shape[0], "Cannot perform Linear on tensors of different shapes"); + void nn_addmm_f16(Tensor2D_F16 *y, const Tensor2D_F16 *x, const Tensor2D_F16 *weight, const Tensor1D_F16 *bias) { + nn_assert(x->shape[1] == weight->shape[1], "Cannot perform Linear on tensors of different shapes"); + nn_assert(bias->shape[0] == weight->shape[0], "Cannot perform Linear on tensors of different shapes"); + nn_assert(y->shape[0] == x->shape[0] && y->shape[1] == weight->shape[0], "Cannot perform Linear on tensors of different shapes"); const size_t batch_size = x->shape[0]; const size_t in_features = x->shape[1]; @@ -28,7 +28,7 @@ void NN_addmm_f32_asm(size_t in_features, size_t out_features, float *y_data, co float16_t *y_data = y_batch_data; #ifdef RISCV_V_ASM - NN_addmm_f16_asm(in_features, out_features, y_data, x_data, weight_data, bias_data); + nn_addmm_f16_asm(in_features, out_features, y_data, x_data, weight_data, bias_data); #else size_t vlmax = __riscv_vsetvlmax_e16m1(); @@ -65,10 +65,10 @@ void NN_addmm_f32_asm(size_t in_features, size_t out_features, float *y_data, co } #endif -void NN_addmm_f32(Tensor2D_F32 *y, const Tensor2D_F32 *x, const Tensor2D_F32 *weight, const Tensor1D_F32 *bias) { - NN_assert(x->shape[1] == weight->shape[1], "Cannot perform Linear on tensors of different shapes"); - NN_assert(bias->shape[0] == weight->shape[0], "Cannot perform Linear on tensors of different shapes"); - NN_assert(y->shape[0] == x->shape[0] && y->shape[1] == weight->shape[0], "Cannot perform Linear on tensors of different shapes"); +void nn_addmm_f32(Tensor2D_F32 *y, const Tensor2D_F32 *x, const Tensor2D_F32 *weight, const Tensor1D_F32 *bias) { + nn_assert(x->shape[1] == weight->shape[1], "Cannot perform Linear on tensors of different shapes"); + nn_assert(bias->shape[0] == weight->shape[0], "Cannot perform Linear on tensors of different shapes"); + nn_assert(y->shape[0] == x->shape[0] && y->shape[1] == weight->shape[0], "Cannot perform Linear on tensors of different shapes"); const size_t batch_size = x->shape[0]; const size_t in_features = x->shape[1]; @@ -84,7 +84,7 @@ void NN_addmm_f32(Tensor2D_F32 *y, const Tensor2D_F32 *x, const Tensor2D_F32 *we float *y_data = y_batch_data; #ifdef RISCV_V_ASM - NN_addmm_f32_asm(in_features, out_features, y_data, x_data, weight_data, bias_data); + nn_addmm_f32_asm(in_features, out_features, y_data, x_data, weight_data, bias_data); #else size_t vlmax = __riscv_vsetvlmax_e32m1(); diff --git a/src/rvv/addscalar.c b/src/rvv/addscalar.c index 5b0bb80..6b8e72e 100644 --- a/src/rvv/addscalar.c +++ b/src/rvv/addscalar.c @@ -4,8 +4,8 @@ #ifdef RISCV_V #ifdef RISCV_ZVFH - void NN_addscalar1d_f16(Tensor1D_F16 *y, const Tensor1D_F16 *x, float16_t scalar) { - NN_assert(x->shape[0] == y->shape[0], "Cannot add tensors of different shapes"); + void nn_addscalar1d_f16(Tensor1D_F16 *y, const Tensor1D_F16 *x, float16_t scalar) { + nn_assert(x->shape[0] == y->shape[0], "Cannot add tensors of different shapes"); size_t n = y->shape[0]; float16_t *x_data = x->data; @@ -23,8 +23,8 @@ } #endif -void NN_addscalar1d_f32(Tensor1D_F32 *y, const Tensor1D_F32 *x, float scalar) { - NN_assert(x->shape[0] == y->shape[0], "Cannot add tensors of different shapes"); +void nn_addscalar1d_f32(Tensor1D_F32 *y, const Tensor1D_F32 *x, float scalar) { + nn_assert(x->shape[0] == y->shape[0], "Cannot add tensors of different shapes"); size_t n = y->shape[0]; float *x_data = x->data; @@ -42,8 +42,8 @@ void NN_addscalar1d_f32(Tensor1D_F32 *y, const Tensor1D_F32 *x, float scalar) { } #ifdef RISCV_ZVFH - void NN_addscalar2d_f16(Tensor2D_F16 *y, const Tensor2D_F16 *x, float16_t scalar) { - NN_assert(x->shape[0] == y->shape[0] && x->shape[1] == y->shape[1], "Cannot add tensors of different shapes"); + void nn_addscalar2d_f16(Tensor2D_F16 *y, const Tensor2D_F16 *x, float16_t scalar) { + nn_assert(x->shape[0] == y->shape[0] && x->shape[1] == y->shape[1], "Cannot add tensors of different shapes"); size_t n = y->shape[0] * y->shape[1]; float16_t *x_data = x->data; @@ -61,8 +61,8 @@ void NN_addscalar1d_f32(Tensor1D_F32 *y, const Tensor1D_F32 *x, float scalar) { } #endif -void NN_addscalar2d_f32(Tensor2D_F32 *y, const Tensor2D_F32 *x, float scalar) { - NN_assert(x->shape[0] == y->shape[0] && x->shape[1] == y->shape[1], "Cannot add tensors of different shapes"); +void nn_addscalar2d_f32(Tensor2D_F32 *y, const Tensor2D_F32 *x, float scalar) { + nn_assert(x->shape[0] == y->shape[0] && x->shape[1] == y->shape[1], "Cannot add tensors of different shapes"); size_t n = y->shape[0] * y->shape[1]; float *x_data = x->data; diff --git a/src/rvv/elu.c b/src/rvv/elu.c index 974ef61..ed4adad 100644 --- a/src/rvv/elu.c +++ b/src/rvv/elu.c @@ -3,8 +3,8 @@ #ifdef RISCV_V -// void NN_elu2d_f32(Tensor2D_F32 *y, const Tensor2D_F32 *x, float alpha) { -// NN_assert(x->shape[0] == y->shape[0] && x->shape[1] == y->shape[1], "Cannot perform ELU on tensors of different shapes"); +// void nn_elu2d_f32(Tensor2D_F32 *y, const Tensor2D_F32 *x, float alpha) { +// nn_assert(x->shape[0] == y->shape[0] && x->shape[1] == y->shape[1], "Cannot perform ELU on tensors of different shapes"); // size_t n = x->shape[0] * x->shape[1]; // float *x_data = x->data; diff --git a/src/rvv/max.c b/src/rvv/max.c index 249601b..5150931 100644 --- a/src/rvv/max.c +++ b/src/rvv/max.c @@ -2,7 +2,7 @@ #include "nn.h" #ifdef RISCV_ZVFH - void NN_max1d_f16(Tensor0D_F16 *y, const Tensor1D_F16 *x) { + void nn_max1d_f16(Tensor0D_F16 *y, const Tensor1D_F16 *x) { vfloat16m1_t vec_max = __riscv_vfmv_v_f_f16m1(-FLT_MAX, 1); size_t n = x->shape[0]; float16_t *x_data = x->data; @@ -18,7 +18,7 @@ } #endif -void NN_max1d_f32(Tensor0D_F32 *y, const Tensor1D_F32 *x) { +void nn_max1d_f32(Tensor0D_F32 *y, const Tensor1D_F32 *x) { vfloat32m1_t vec_max = __riscv_vfmv_s_f_f32m1(-FLT_MAX, 1); size_t n = x->shape[0]; float *x_data = x->data; @@ -34,7 +34,7 @@ void NN_max1d_f32(Tensor0D_F32 *y, const Tensor1D_F32 *x) { } #ifdef RISCV_ZVFH - void NN_max2d_f16(Tensor0D_F16 *y, const Tensor2D_F16 *x) { + void nn_max2d_f16(Tensor0D_F16 *y, const Tensor2D_F16 *x) { vfloat16m1_t vec_max = __riscv_vfmv_v_f_f16m1(-FLT_MAX, 1); size_t n = x->shape[0] * x->shape[1]; float16_t *x_data = x->data; @@ -50,7 +50,7 @@ void NN_max1d_f32(Tensor0D_F32 *y, const Tensor1D_F32 *x) { } #endif -void NN_max2d_f32(Tensor0D_F32 *y, const Tensor2D_F32 *x) { +void nn_max2d_f32(Tensor0D_F32 *y, const Tensor2D_F32 *x) { vfloat32m1_t vec_max = __riscv_vfmv_s_f_f32m1(-FLT_MAX, 1); size_t n = x->shape[0] * x->shape[1]; float *x_data = x->data; diff --git a/src/rvv/min.c b/src/rvv/min.c index efc26fb..49070c6 100644 --- a/src/rvv/min.c +++ b/src/rvv/min.c @@ -2,7 +2,7 @@ #include "nn.h" #ifdef RISCV_ZVFH - void NN_min1d_f16(Tensor0D_F16 *y, const Tensor1D_F16 *x) { + void nn_min1d_f16(Tensor0D_F16 *y, const Tensor1D_F16 *x) { vfloat16m1_t vec_min = __riscv_vfmv_v_f_f16m1(FLT_MAX, 1); size_t n = x->shape[0]; float16_t *x_data = x->data; @@ -18,7 +18,7 @@ } #endif -void NN_min1d_f32(Tensor0D_F32 *y, const Tensor1D_F32 *x) { +void nn_min1d_f32(Tensor0D_F32 *y, const Tensor1D_F32 *x) { vfloat32m1_t vec_min = __riscv_vfmv_s_f_f32m1(FLT_MAX, 1); size_t n = x->shape[0]; float *x_data = x->data; @@ -34,7 +34,7 @@ void NN_min1d_f32(Tensor0D_F32 *y, const Tensor1D_F32 *x) { } #ifdef RISCV_ZVFH - void NN_min2d_f16(Tensor0D_F16 *y, const Tensor2D_F16 *x) { + void nn_min2d_f16(Tensor0D_F16 *y, const Tensor2D_F16 *x) { vfloat16m1_t vec_min = __riscv_vfmv_v_f_f16m1(FLT_MAX, 1); size_t n = x->shape[0] * x->shape[1]; float16_t *x_data = x->data; @@ -50,7 +50,7 @@ void NN_min1d_f32(Tensor0D_F32 *y, const Tensor1D_F32 *x) { } #endif -void NN_min2d_f32(Tensor0D_F32 *y, const Tensor2D_F32 *x) { +void nn_min2d_f32(Tensor0D_F32 *y, const Tensor2D_F32 *x) { vfloat32m1_t vec_min = __riscv_vfmv_s_f_f32m1(FLT_MAX, 1); size_t n = x->shape[0] * x->shape[1]; float *x_data = x->data; diff --git a/src/rvv/mm.S b/src/rvv/mm.S index f56a403..d6c4740 100644 --- a/src/rvv/mm.S +++ b/src/rvv/mm.S @@ -1,6 +1,6 @@ .section .text .align 4 -.globl NN_mm_f16_asm +.globl nn_mm_f16_asm NN_mm_f16_asm: vsetvli a7, zero, e16, m1, ta, ma beqz a1, __nn_mm_f16_exit @@ -41,7 +41,7 @@ __nn_mm_f16_exit: .section .text .align 4 -.globl NN_mm_f32_asm +.globl nn_mm_f32_asm NN_mm_f32_asm: vsetvli a7, zero, e32, m1, ta, ma beqz a1, __nn_mm_f32_exit diff --git a/src/rvv/mm.c b/src/rvv/mm.c index 05762a5..1a67b7d 100644 --- a/src/rvv/mm.c +++ b/src/rvv/mm.c @@ -4,14 +4,14 @@ #ifdef RISCV_V #ifdef RISCV_ZVFH - void NN_mm_f16_asm(size_t in_features, size_t out_features, float16_t *y_data, const float16_t *x1_data, const float16_t *x2_data); + void nn_mm_f16_asm(size_t in_features, size_t out_features, float16_t *y_data, const float16_t *x1_data, const float16_t *x2_data); #endif -void NN_mm_f32_asm(size_t in_features, size_t out_features, float *y_data, const float *x1_data, const float *x2_data); +void nn_mm_f32_asm(size_t in_features, size_t out_features, float *y_data, const float *x1_data, const float *x2_data); #ifdef RISCV_ZVFH - void NN_mm_f16(Tensor2D_F16 *y, const Tensor2D_F16 *x1, const Tensor2D_F16 *x2) { - NN_assert(x1->shape[1] == x2->shape[1], "Cannot perform MatMul on tensors of different shapes"); - NN_assert(y->shape[0] == x1->shape[0] && y->shape[1] == x2->shape[0], "Cannot perform MatMul on tensors of different shapes"); + void nn_mm_f16(Tensor2D_F16 *y, const Tensor2D_F16 *x1, const Tensor2D_F16 *x2) { + nn_assert(x1->shape[1] == x2->shape[1], "Cannot perform MatMul on tensors of different shapes"); + nn_assert(y->shape[0] == x1->shape[0] && y->shape[1] == x2->shape[0], "Cannot perform MatMul on tensors of different shapes"); const size_t batch_size = x1->shape[0]; const size_t in_features = x1->shape[1]; @@ -27,7 +27,7 @@ void NN_mm_f32_asm(size_t in_features, size_t out_features, float *y_data, const float16_t *y_data = y_batch_data; #ifdef RISCV_V_ASM - NN_mm_f16_asm(in_features, out_features, y_data, x1_data, x2_data); + nn_mm_f16_asm(in_features, out_features, y_data, x1_data, x2_data); #else size_t vlmax = __riscv_vsetvlmax_e16m1(); @@ -64,9 +64,9 @@ void NN_mm_f32_asm(size_t in_features, size_t out_features, float *y_data, const } #endif -void NN_mm_f32(Tensor2D_F32 *y, const Tensor2D_F32 *x1, const Tensor2D_F32 *x2) { - NN_assert(x1->shape[1] == x2->shape[1], "Cannot perform MatMul on tensors of different shapes"); - NN_assert(y->shape[0] == x1->shape[0] && y->shape[1] == x2->shape[0], "Cannot perform MatMul on tensors of different shapes"); +void nn_mm_f32(Tensor2D_F32 *y, const Tensor2D_F32 *x1, const Tensor2D_F32 *x2) { + nn_assert(x1->shape[1] == x2->shape[1], "Cannot perform MatMul on tensors of different shapes"); + nn_assert(y->shape[0] == x1->shape[0] && y->shape[1] == x2->shape[0], "Cannot perform MatMul on tensors of different shapes"); const size_t batch_size = x1->shape[0]; const size_t in_features = x1->shape[1]; @@ -82,7 +82,7 @@ void NN_mm_f32(Tensor2D_F32 *y, const Tensor2D_F32 *x1, const Tensor2D_F32 *x2) float *y_data = y_batch_data; #ifdef RISCV_V_ASM - NN_mm_f32_asm(in_features, out_features, y_data, x1_data, x2_data); + nn_mm_f32_asm(in_features, out_features, y_data, x1_data, x2_data); #else size_t vlmax = __riscv_vsetvlmax_e32m1(); diff --git a/src/rvv/mul.c b/src/rvv/mul.c index 967fd59..a425ac0 100644 --- a/src/rvv/mul.c +++ b/src/rvv/mul.c @@ -4,14 +4,14 @@ #ifdef RISCV_V #ifdef RISCV_ZVFH - void NN_mul_f16_asm(size_t n, float16_t *y_data, const float16_t *x1_data, const float16_t *x2_data); + void nn_mul_f16_asm(size_t n, float16_t *y_data, const float16_t *x1_data, const float16_t *x2_data); #endif -void NN_mul_f32_asm(size_t n, float *y_data, const float *x1_data, const float *x2_data); +void nn_mul_f32_asm(size_t n, float *y_data, const float *x1_data, const float *x2_data); #ifdef RISCV_ZVFH - void NN_mul1d_f16(Tensor1D_F16 *y, const Tensor1D_F16 *x1, const Tensor1D_F16 *x2) { - NN_assert(x1->shape[0] == x2->shape[0], "Cannot add tensors of different shapes"); - NN_assert(y->shape[0] == x1->shape[0], "Cannot add tensors of different shapes"); + void nn_mul1d_f16(Tensor1D_F16 *y, const Tensor1D_F16 *x1, const Tensor1D_F16 *x2) { + nn_assert(x1->shape[0] == x2->shape[0], "Cannot add tensors of different shapes"); + nn_assert(y->shape[0] == x1->shape[0], "Cannot add tensors of different shapes"); size_t n = y->shape[0]; float16_t *x1_data = x1->data; @@ -19,7 +19,7 @@ void NN_mul_f32_asm(size_t n, float *y_data, const float *x1_data, const float * float16_t *y_data = y->data; #ifdef RISCV_V_ASM - NN_mul_f16_asm(n, y_data, x1_data, x2_data); + nn_mul_f16_asm(n, y_data, x1_data, x2_data); #else while (n > 0) { size_t vl = __riscv_vsetvl_e16m1(n); @@ -36,9 +36,9 @@ void NN_mul_f32_asm(size_t n, float *y_data, const float *x1_data, const float * } #endif -void NN_mul1d_f32(Tensor1D_F32 *y, const Tensor1D_F32 *x1, const Tensor1D_F32 *x2) { - NN_assert(x1->shape[0] == x2->shape[0], "Cannot add tensors of different shapes"); - NN_assert(y->shape[0] == x1->shape[0], "Cannot add tensors of different shapes"); +void nn_mul1d_f32(Tensor1D_F32 *y, const Tensor1D_F32 *x1, const Tensor1D_F32 *x2) { + nn_assert(x1->shape[0] == x2->shape[0], "Cannot add tensors of different shapes"); + nn_assert(y->shape[0] == x1->shape[0], "Cannot add tensors of different shapes"); size_t n = y->shape[0]; float *x1_data = x1->data; @@ -46,7 +46,7 @@ void NN_mul1d_f32(Tensor1D_F32 *y, const Tensor1D_F32 *x1, const Tensor1D_F32 *x float *y_data = y->data; #ifdef RISCV_V_ASM - NN_mul_f32_asm(n, y_data, x1_data, x2_data); + nn_mul_f32_asm(n, y_data, x1_data, x2_data); #else while (n > 0) { size_t vl = __riscv_vsetvl_e32m1(n); @@ -63,9 +63,9 @@ void NN_mul1d_f32(Tensor1D_F32 *y, const Tensor1D_F32 *x1, const Tensor1D_F32 *x } #ifdef RISCV_ZVFH - void NN_mul2d_f16(Tensor2D_F16 *y, const Tensor2D_F16 *x1, const Tensor2D_F16 *x2) { - NN_assert(x1->shape[0] == x2->shape[0] && x1->shape[1] == x2->shape[1], "Cannot add tensors of different shapes"); - NN_assert(y->shape[0] == x1->shape[0] && y->shape[1] == x1->shape[1], "Cannot add tensors of different shapes"); + void nn_mul2d_f16(Tensor2D_F16 *y, const Tensor2D_F16 *x1, const Tensor2D_F16 *x2) { + nn_assert(x1->shape[0] == x2->shape[0] && x1->shape[1] == x2->shape[1], "Cannot add tensors of different shapes"); + nn_assert(y->shape[0] == x1->shape[0] && y->shape[1] == x1->shape[1], "Cannot add tensors of different shapes"); size_t n = y->shape[0] * y->shape[1]; float16_t *x1_data = x1->data; @@ -73,7 +73,7 @@ void NN_mul1d_f32(Tensor1D_F32 *y, const Tensor1D_F32 *x1, const Tensor1D_F32 *x float16_t *y_data = y->data; #ifdef RISCV_V_ASM - NN_mul_f16_asm(n, y_data, x1_data, x2_data); + nn_mul_f16_asm(n, y_data, x1_data, x2_data); #else while (n > 0) { size_t vl = __riscv_vsetvl_e16m1(n); @@ -90,9 +90,9 @@ void NN_mul1d_f32(Tensor1D_F32 *y, const Tensor1D_F32 *x1, const Tensor1D_F32 *x } #endif -void NN_mul2d_f32(Tensor2D_F32 *y, const Tensor2D_F32 *x1, const Tensor2D_F32 *x2) { - NN_assert(x1->shape[0] == x2->shape[0] && x1->shape[1] == x2->shape[1], "Cannot add tensors of different shapes"); - NN_assert(y->shape[0] == x1->shape[0] && y->shape[1] == x1->shape[1], "Cannot add tensors of different shapes"); +void nn_mul2d_f32(Tensor2D_F32 *y, const Tensor2D_F32 *x1, const Tensor2D_F32 *x2) { + nn_assert(x1->shape[0] == x2->shape[0] && x1->shape[1] == x2->shape[1], "Cannot add tensors of different shapes"); + nn_assert(y->shape[0] == x1->shape[0] && y->shape[1] == x1->shape[1], "Cannot add tensors of different shapes"); size_t n = y->shape[0] * y->shape[1]; float *x1_data = x1->data; @@ -100,7 +100,7 @@ void NN_mul2d_f32(Tensor2D_F32 *y, const Tensor2D_F32 *x1, const Tensor2D_F32 *x float *y_data = y->data; #ifdef RISCV_V_ASM - NN_mul_f32_asm(n, y_data, x1_data, x2_data); + nn_mul_f32_asm(n, y_data, x1_data, x2_data); #else while (n > 0) { size_t vl = __riscv_vsetvl_e32m1(n); diff --git a/src/rvv/mulscalar.c b/src/rvv/mulscalar.c index 7144b21..c0c6a87 100644 --- a/src/rvv/mulscalar.c +++ b/src/rvv/mulscalar.c @@ -4,8 +4,8 @@ #ifdef RISCV_V #ifdef RISCV_ZVFH - void NN_mulscalar1d_f16(Tensor1D_F16 *y, const Tensor1D_F16 *x, float16_t scalar) { - NN_assert(x->shape[0] == y->shape[0], "Cannot add tensors of different shapes"); + void nn_mulscalar1d_f16(Tensor1D_F16 *y, const Tensor1D_F16 *x, float16_t scalar) { + nn_assert(x->shape[0] == y->shape[0], "Cannot add tensors of different shapes"); size_t n = y->shape[0]; float16_t *x_data = x->data; @@ -23,8 +23,8 @@ } #endif -void NN_mulscalar1d_f32(Tensor1D_F32 *y, const Tensor1D_F32 *x, float scalar) { - NN_assert(x->shape[0] == y->shape[0], "Cannot add tensors of different shapes"); +void nn_mulscalar1d_f32(Tensor1D_F32 *y, const Tensor1D_F32 *x, float scalar) { + nn_assert(x->shape[0] == y->shape[0], "Cannot add tensors of different shapes"); size_t n = y->shape[0]; float *x_data = x->data; @@ -42,8 +42,8 @@ void NN_mulscalar1d_f32(Tensor1D_F32 *y, const Tensor1D_F32 *x, float scalar) { } #ifdef RISCV_ZVFH - void NN_mulscalar2d_f16(Tensor2D_F16 *y, const Tensor2D_F16 *x, float16_t scalar) { - NN_assert(x->shape[0] == y->shape[0] && x->shape[1] == y->shape[1], "Cannot add tensors of different shapes"); + void nn_mulscalar2d_f16(Tensor2D_F16 *y, const Tensor2D_F16 *x, float16_t scalar) { + nn_assert(x->shape[0] == y->shape[0] && x->shape[1] == y->shape[1], "Cannot add tensors of different shapes"); size_t n = y->shape[0] * y->shape[1]; float16_t *x_data = x->data; @@ -61,8 +61,8 @@ void NN_mulscalar1d_f32(Tensor1D_F32 *y, const Tensor1D_F32 *x, float scalar) { } #endif -void NN_mulscalar2d_f32(Tensor2D_F32 *y, const Tensor2D_F32 *x, float scalar) { - NN_assert(x->shape[0] == y->shape[0] && x->shape[1] == y->shape[1], "Cannot add tensors of different shapes"); +void nn_mulscalar2d_f32(Tensor2D_F32 *y, const Tensor2D_F32 *x, float scalar) { + nn_assert(x->shape[0] == y->shape[0] && x->shape[1] == y->shape[1], "Cannot add tensors of different shapes"); size_t n = y->shape[0] * y->shape[1]; float *x_data = x->data; diff --git a/src/rvv/relu.S b/src/rvv/relu.S index a6e4677..7962225 100644 --- a/src/rvv/relu.S +++ b/src/rvv/relu.S @@ -2,8 +2,8 @@ #ifdef RISCV_ZVFH .section .text .align 4 - .globl NN_relu_f16_asm - NN_relu_f16_asm: + .globl nn_relu_f16_asm + nn_relu_f16_asm: beqz a0, __relu_f16_exit fmv.h.x fa5, zero __relu_f16_loop: @@ -22,7 +22,7 @@ .section .text .align 4 -.globl NN_relu_f32_asm +.globl nn_relu_f32_asm NN_relu_f32_asm: beqz a0, __relu_f32_exit fmv.w.x fa5, zero diff --git a/src/rvv/relu.c b/src/rvv/relu.c index 26e2de0..ee459d2 100644 --- a/src/rvv/relu.c +++ b/src/rvv/relu.c @@ -4,20 +4,20 @@ #ifdef RISCV_V #ifdef RISCV_ZVFH - void NN_relu_f16_asm(size_t n, float16_t *y_data, const float16_t *x_data); + void nn_relu_f16_asm(size_t n, float16_t *y_data, const float16_t *x_data); #endif -void NN_relu_f32_asm(size_t n, float *y_data, const float *x_data); +void nn_relu_f32_asm(size_t n, float *y_data, const float *x_data); #ifdef RISCV_ZVFH - void NN_relu2d_f16(Tensor2D_F16 *y, const Tensor2D_F16 *x) { - NN_assert(x->shape[0] == y->shape[0] && x->shape[1] == y->shape[1], "Cannot perform ReLU on tensors of different shapes"); + void nn_relu2d_f16(Tensor2D_F16 *y, const Tensor2D_F16 *x) { + nn_assert(x->shape[0] == y->shape[0] && x->shape[1] == y->shape[1], "Cannot perform ReLU on tensors of different shapes"); size_t n = y->shape[0] * y->shape[1]; float16_t *x_data = x->data; float16_t *y_data = y->data; #ifdef RISCV_V_ASM - NN_relu_f16_asm(n, y_data, x_data); + nn_relu_f16_asm(n, y_data, x_data); #else float16_t zero = 0.0f; @@ -34,15 +34,15 @@ void NN_relu_f32_asm(size_t n, float *y_data, const float *x_data); } #endif -void NN_relu2d_f32(Tensor2D_F32 *y, const Tensor2D_F32 *x) { - NN_assert(x->shape[0] == y->shape[0] && x->shape[1] == y->shape[1], "Cannot perform ReLU on tensors of different shapes"); +void nn_relu2d_f32(Tensor2D_F32 *y, const Tensor2D_F32 *x) { + nn_assert(x->shape[0] == y->shape[0] && x->shape[1] == y->shape[1], "Cannot perform ReLU on tensors of different shapes"); size_t n = y->shape[0] * y->shape[1]; float *x_data = x->data; float *y_data = y->data; #ifdef RISCV_V_ASM - NN_relu_f32_asm(n, y_data, x_data); + nn_relu_f32_asm(n, y_data, x_data); #else float zero = 0.0f; diff --git a/tests/src/generate_test.py b/tests/src/generate_test.py index bcdb8e7..b0dd311 100644 --- a/tests/src/generate_test.py +++ b/tests/src/generate_test.py @@ -50,10 +50,10 @@ class TestGenerator: cycles = read_cycles(); {{ func_str }} cycles = read_cycles() - cycles; - printf("%s (%lu cycles)\\n", NN_equals{{ dim }}d_{{ dtype.lower() }}(&golden, &actual, {{ precision }}) ? "PASS" : "FAIL", cycles); + printf("%s (%lu cycles)\\n", nn_equals{{ dim }}d_{{ dtype.lower() }}(&golden, &actual, {{ precision }}) ? "PASS" : "FAIL", cycles); {% for tensor_str in tensor_destructors %}{{ tensor_str }}{% endfor %} - // NN_free_tensor_data(actual); + // nn_free_tensor_data(actual); }""" @staticmethod diff --git a/tests/src/generated.c b/tests/src/generated.c index d79ec8c..0b45b62 100644 --- a/tests/src/generated.c +++ b/tests/src/generated.c @@ -40,12 +40,12 @@ int main() { }; cycles = read_cycles(); - NN_add1d_f16(&actual, &a, &b); + nn_add1d_f16(&actual, &a, &b); cycles = read_cycles() - cycles; - printf("%s (%lu cycles)\n", NN_equals1d_f16(&golden, &actual, 1e-2) ? "PASS" : "FAIL", cycles); + printf("%s (%lu cycles)\n", nn_equals1d_f16(&golden, &actual, 1e-2) ? "PASS" : "FAIL", cycles); - // NN_free_tensor_data(actual); + // nn_free_tensor_data(actual); } { @@ -75,12 +75,12 @@ int main() { }; cycles = read_cycles(); - NN_add2d_f16(&actual, &a, &b); + nn_add2d_f16(&actual, &a, &b); cycles = read_cycles() - cycles; - printf("%s (%lu cycles)\n", NN_equals2d_f16(&golden, &actual, 1e-2) ? "PASS" : "FAIL", cycles); + printf("%s (%lu cycles)\n", nn_equals2d_f16(&golden, &actual, 1e-2) ? "PASS" : "FAIL", cycles); - // NN_free_tensor_data(actual); + // nn_free_tensor_data(actual); } { @@ -105,12 +105,12 @@ int main() { }; cycles = read_cycles(); - NN_addscalar1d_f16(&actual, &a, as_f16(0.5)); + nn_addscalar1d_f16(&actual, &a, as_f16(0.5)); cycles = read_cycles() - cycles; - printf("%s (%lu cycles)\n", NN_equals1d_f16(&golden, &actual, 1e-2) ? "PASS" : "FAIL", cycles); + printf("%s (%lu cycles)\n", nn_equals1d_f16(&golden, &actual, 1e-2) ? "PASS" : "FAIL", cycles); - // NN_free_tensor_data(actual); + // nn_free_tensor_data(actual); } { @@ -135,12 +135,12 @@ int main() { }; cycles = read_cycles(); - NN_addscalar2d_f16(&actual, &a, as_f16(0.5)); + nn_addscalar2d_f16(&actual, &a, as_f16(0.5)); cycles = read_cycles() - cycles; - printf("%s (%lu cycles)\n", NN_equals2d_f16(&golden, &actual, 1e-2) ? "PASS" : "FAIL", cycles); + printf("%s (%lu cycles)\n", nn_equals2d_f16(&golden, &actual, 1e-2) ? "PASS" : "FAIL", cycles); - // NN_free_tensor_data(actual); + // nn_free_tensor_data(actual); } { @@ -170,12 +170,12 @@ int main() { }; cycles = read_cycles(); - NN_mul1d_f16(&actual, &a, &b); + nn_mul1d_f16(&actual, &a, &b); cycles = read_cycles() - cycles; - printf("%s (%lu cycles)\n", NN_equals1d_f16(&golden, &actual, 1e-2) ? "PASS" : "FAIL", cycles); + printf("%s (%lu cycles)\n", nn_equals1d_f16(&golden, &actual, 1e-2) ? "PASS" : "FAIL", cycles); - // NN_free_tensor_data(actual); + // nn_free_tensor_data(actual); } { @@ -205,12 +205,12 @@ int main() { }; cycles = read_cycles(); - NN_mul2d_f16(&actual, &a, &b); + nn_mul2d_f16(&actual, &a, &b); cycles = read_cycles() - cycles; - printf("%s (%lu cycles)\n", NN_equals2d_f16(&golden, &actual, 1e-2) ? "PASS" : "FAIL", cycles); + printf("%s (%lu cycles)\n", nn_equals2d_f16(&golden, &actual, 1e-2) ? "PASS" : "FAIL", cycles); - // NN_free_tensor_data(actual); + // nn_free_tensor_data(actual); } { @@ -235,12 +235,12 @@ int main() { }; cycles = read_cycles(); - NN_mulscalar1d_f16(&actual, &a, as_f16(0.5)); + nn_mulscalar1d_f16(&actual, &a, as_f16(0.5)); cycles = read_cycles() - cycles; - printf("%s (%lu cycles)\n", NN_equals1d_f16(&golden, &actual, 1e-2) ? "PASS" : "FAIL", cycles); + printf("%s (%lu cycles)\n", nn_equals1d_f16(&golden, &actual, 1e-2) ? "PASS" : "FAIL", cycles); - // NN_free_tensor_data(actual); + // nn_free_tensor_data(actual); } { @@ -265,12 +265,12 @@ int main() { }; cycles = read_cycles(); - NN_mulscalar2d_f16(&actual, &a, as_f16(0.5)); + nn_mulscalar2d_f16(&actual, &a, as_f16(0.5)); cycles = read_cycles() - cycles; - printf("%s (%lu cycles)\n", NN_equals2d_f16(&golden, &actual, 1e-2) ? "PASS" : "FAIL", cycles); + printf("%s (%lu cycles)\n", nn_equals2d_f16(&golden, &actual, 1e-2) ? "PASS" : "FAIL", cycles); - // NN_free_tensor_data(actual); + // nn_free_tensor_data(actual); } { @@ -300,12 +300,12 @@ int main() { }; cycles = read_cycles(); - NN_mm_f16(&actual, &x, &w); + nn_mm_f16(&actual, &x, &w); cycles = read_cycles() - cycles; - printf("%s (%lu cycles)\n", NN_equals2d_f16(&golden, &actual, 1e-2) ? "PASS" : "FAIL", cycles); + printf("%s (%lu cycles)\n", nn_equals2d_f16(&golden, &actual, 1e-2) ? "PASS" : "FAIL", cycles); - // NN_free_tensor_data(actual); + // nn_free_tensor_data(actual); } { @@ -330,12 +330,12 @@ int main() { }; cycles = read_cycles(); - NN_max1d_f16(&actual, &x); + nn_max1d_f16(&actual, &x); cycles = read_cycles() - cycles; - printf("%s (%lu cycles)\n", NN_equals0d_f16(&golden, &actual, 1e-2) ? "PASS" : "FAIL", cycles); + printf("%s (%lu cycles)\n", nn_equals0d_f16(&golden, &actual, 1e-2) ? "PASS" : "FAIL", cycles); - // NN_free_tensor_data(actual); + // nn_free_tensor_data(actual); } { @@ -360,12 +360,12 @@ int main() { }; cycles = read_cycles(); - NN_max2d_f16(&actual, &x); + nn_max2d_f16(&actual, &x); cycles = read_cycles() - cycles; - printf("%s (%lu cycles)\n", NN_equals0d_f16(&golden, &actual, 1e-2) ? "PASS" : "FAIL", cycles); + printf("%s (%lu cycles)\n", nn_equals0d_f16(&golden, &actual, 1e-2) ? "PASS" : "FAIL", cycles); - // NN_free_tensor_data(actual); + // nn_free_tensor_data(actual); } { @@ -390,12 +390,12 @@ int main() { }; cycles = read_cycles(); - NN_min1d_f16(&actual, &x); + nn_min1d_f16(&actual, &x); cycles = read_cycles() - cycles; - printf("%s (%lu cycles)\n", NN_equals0d_f16(&golden, &actual, 1e-2) ? "PASS" : "FAIL", cycles); + printf("%s (%lu cycles)\n", nn_equals0d_f16(&golden, &actual, 1e-2) ? "PASS" : "FAIL", cycles); - // NN_free_tensor_data(actual); + // nn_free_tensor_data(actual); } { @@ -420,12 +420,12 @@ int main() { }; cycles = read_cycles(); - NN_min2d_f16(&actual, &x); + nn_min2d_f16(&actual, &x); cycles = read_cycles() - cycles; - printf("%s (%lu cycles)\n", NN_equals0d_f16(&golden, &actual, 1e-2) ? "PASS" : "FAIL", cycles); + printf("%s (%lu cycles)\n", nn_equals0d_f16(&golden, &actual, 1e-2) ? "PASS" : "FAIL", cycles); - // NN_free_tensor_data(actual); + // nn_free_tensor_data(actual); } { @@ -460,12 +460,12 @@ int main() { }; cycles = read_cycles(); - NN_addmm_f16(&actual, &x, &w, &b); + nn_addmm_f16(&actual, &x, &w, &b); cycles = read_cycles() - cycles; - printf("%s (%lu cycles)\n", NN_equals2d_f16(&golden, &actual, 1e-2) ? "PASS" : "FAIL", cycles); + printf("%s (%lu cycles)\n", nn_equals2d_f16(&golden, &actual, 1e-2) ? "PASS" : "FAIL", cycles); - // NN_free_tensor_data(actual); + // nn_free_tensor_data(actual); } { @@ -490,12 +490,12 @@ int main() { }; cycles = read_cycles(); - NN_relu2d_f16(&actual, &x); + nn_relu2d_f16(&actual, &x); cycles = read_cycles() - cycles; - printf("%s (%lu cycles)\n", NN_equals2d_f16(&golden, &actual, 1e-2) ? "PASS" : "FAIL", cycles); + printf("%s (%lu cycles)\n", nn_equals2d_f16(&golden, &actual, 1e-2) ? "PASS" : "FAIL", cycles); - // NN_free_tensor_data(actual); + // nn_free_tensor_data(actual); } { @@ -520,12 +520,12 @@ int main() { }; cycles = read_cycles(); - NN_elu2d_f16(&actual, &x, 1.0); + nn_elu2d_f16(&actual, &x, 1.0); cycles = read_cycles() - cycles; - printf("%s (%lu cycles)\n", NN_equals2d_f16(&golden, &actual, 1e-2) ? "PASS" : "FAIL", cycles); + printf("%s (%lu cycles)\n", nn_equals2d_f16(&golden, &actual, 1e-2) ? "PASS" : "FAIL", cycles); - // NN_free_tensor_data(actual); + // nn_free_tensor_data(actual); } { @@ -555,12 +555,12 @@ int main() { }; cycles = read_cycles(); - NN_add1d_f32(&actual, &a, &b); + nn_add1d_f32(&actual, &a, &b); cycles = read_cycles() - cycles; - printf("%s (%lu cycles)\n", NN_equals1d_f32(&golden, &actual, 1e-4) ? "PASS" : "FAIL", cycles); + printf("%s (%lu cycles)\n", nn_equals1d_f32(&golden, &actual, 1e-4) ? "PASS" : "FAIL", cycles); - // NN_free_tensor_data(actual); + // nn_free_tensor_data(actual); } { @@ -590,12 +590,12 @@ int main() { }; cycles = read_cycles(); - NN_add2d_f32(&actual, &a, &b); + nn_add2d_f32(&actual, &a, &b); cycles = read_cycles() - cycles; - printf("%s (%lu cycles)\n", NN_equals2d_f32(&golden, &actual, 1e-4) ? "PASS" : "FAIL", cycles); + printf("%s (%lu cycles)\n", nn_equals2d_f32(&golden, &actual, 1e-4) ? "PASS" : "FAIL", cycles); - // NN_free_tensor_data(actual); + // nn_free_tensor_data(actual); } { @@ -620,12 +620,12 @@ int main() { }; cycles = read_cycles(); - NN_addscalar1d_f32(&actual, &a, 0.5); + nn_addscalar1d_f32(&actual, &a, 0.5); cycles = read_cycles() - cycles; - printf("%s (%lu cycles)\n", NN_equals1d_f32(&golden, &actual, 1e-4) ? "PASS" : "FAIL", cycles); + printf("%s (%lu cycles)\n", nn_equals1d_f32(&golden, &actual, 1e-4) ? "PASS" : "FAIL", cycles); - // NN_free_tensor_data(actual); + // nn_free_tensor_data(actual); } { @@ -650,12 +650,12 @@ int main() { }; cycles = read_cycles(); - NN_addscalar2d_f32(&actual, &a, 0.5); + nn_addscalar2d_f32(&actual, &a, 0.5); cycles = read_cycles() - cycles; - printf("%s (%lu cycles)\n", NN_equals2d_f32(&golden, &actual, 1e-4) ? "PASS" : "FAIL", cycles); + printf("%s (%lu cycles)\n", nn_equals2d_f32(&golden, &actual, 1e-4) ? "PASS" : "FAIL", cycles); - // NN_free_tensor_data(actual); + // nn_free_tensor_data(actual); } { @@ -685,12 +685,12 @@ int main() { }; cycles = read_cycles(); - NN_mul1d_f32(&actual, &a, &b); + nn_mul1d_f32(&actual, &a, &b); cycles = read_cycles() - cycles; - printf("%s (%lu cycles)\n", NN_equals1d_f32(&golden, &actual, 1e-4) ? "PASS" : "FAIL", cycles); + printf("%s (%lu cycles)\n", nn_equals1d_f32(&golden, &actual, 1e-4) ? "PASS" : "FAIL", cycles); - // NN_free_tensor_data(actual); + // nn_free_tensor_data(actual); } { @@ -720,12 +720,12 @@ int main() { }; cycles = read_cycles(); - NN_mul2d_f32(&actual, &a, &b); + nn_mul2d_f32(&actual, &a, &b); cycles = read_cycles() - cycles; - printf("%s (%lu cycles)\n", NN_equals2d_f32(&golden, &actual, 1e-4) ? "PASS" : "FAIL", cycles); + printf("%s (%lu cycles)\n", nn_equals2d_f32(&golden, &actual, 1e-4) ? "PASS" : "FAIL", cycles); - // NN_free_tensor_data(actual); + // nn_free_tensor_data(actual); } { @@ -750,12 +750,12 @@ int main() { }; cycles = read_cycles(); - NN_mulscalar1d_f32(&actual, &a, 0.5); + nn_mulscalar1d_f32(&actual, &a, 0.5); cycles = read_cycles() - cycles; - printf("%s (%lu cycles)\n", NN_equals1d_f32(&golden, &actual, 1e-4) ? "PASS" : "FAIL", cycles); + printf("%s (%lu cycles)\n", nn_equals1d_f32(&golden, &actual, 1e-4) ? "PASS" : "FAIL", cycles); - // NN_free_tensor_data(actual); + // nn_free_tensor_data(actual); } { @@ -780,12 +780,12 @@ int main() { }; cycles = read_cycles(); - NN_mulscalar2d_f32(&actual, &a, 0.5); + nn_mulscalar2d_f32(&actual, &a, 0.5); cycles = read_cycles() - cycles; - printf("%s (%lu cycles)\n", NN_equals2d_f32(&golden, &actual, 1e-4) ? "PASS" : "FAIL", cycles); + printf("%s (%lu cycles)\n", nn_equals2d_f32(&golden, &actual, 1e-4) ? "PASS" : "FAIL", cycles); - // NN_free_tensor_data(actual); + // nn_free_tensor_data(actual); } { @@ -815,12 +815,12 @@ int main() { }; cycles = read_cycles(); - NN_mm_f32(&actual, &x, &w); + nn_mm_f32(&actual, &x, &w); cycles = read_cycles() - cycles; - printf("%s (%lu cycles)\n", NN_equals2d_f32(&golden, &actual, 1e-4) ? "PASS" : "FAIL", cycles); + printf("%s (%lu cycles)\n", nn_equals2d_f32(&golden, &actual, 1e-4) ? "PASS" : "FAIL", cycles); - // NN_free_tensor_data(actual); + // nn_free_tensor_data(actual); } { @@ -845,12 +845,12 @@ int main() { }; cycles = read_cycles(); - NN_max1d_f32(&actual, &x); + nn_max1d_f32(&actual, &x); cycles = read_cycles() - cycles; - printf("%s (%lu cycles)\n", NN_equals0d_f32(&golden, &actual, 1e-4) ? "PASS" : "FAIL", cycles); + printf("%s (%lu cycles)\n", nn_equals0d_f32(&golden, &actual, 1e-4) ? "PASS" : "FAIL", cycles); - // NN_free_tensor_data(actual); + // nn_free_tensor_data(actual); } { @@ -875,12 +875,12 @@ int main() { }; cycles = read_cycles(); - NN_max2d_f32(&actual, &x); + nn_max2d_f32(&actual, &x); cycles = read_cycles() - cycles; - printf("%s (%lu cycles)\n", NN_equals0d_f32(&golden, &actual, 1e-4) ? "PASS" : "FAIL", cycles); + printf("%s (%lu cycles)\n", nn_equals0d_f32(&golden, &actual, 1e-4) ? "PASS" : "FAIL", cycles); - // NN_free_tensor_data(actual); + // nn_free_tensor_data(actual); } { @@ -905,12 +905,12 @@ int main() { }; cycles = read_cycles(); - NN_min1d_f32(&actual, &x); + nn_min1d_f32(&actual, &x); cycles = read_cycles() - cycles; - printf("%s (%lu cycles)\n", NN_equals0d_f32(&golden, &actual, 1e-4) ? "PASS" : "FAIL", cycles); + printf("%s (%lu cycles)\n", nn_equals0d_f32(&golden, &actual, 1e-4) ? "PASS" : "FAIL", cycles); - // NN_free_tensor_data(actual); + // nn_free_tensor_data(actual); } { @@ -935,12 +935,12 @@ int main() { }; cycles = read_cycles(); - NN_min2d_f32(&actual, &x); + nn_min2d_f32(&actual, &x); cycles = read_cycles() - cycles; - printf("%s (%lu cycles)\n", NN_equals0d_f32(&golden, &actual, 1e-4) ? "PASS" : "FAIL", cycles); + printf("%s (%lu cycles)\n", nn_equals0d_f32(&golden, &actual, 1e-4) ? "PASS" : "FAIL", cycles); - // NN_free_tensor_data(actual); + // nn_free_tensor_data(actual); } { @@ -975,12 +975,12 @@ int main() { }; cycles = read_cycles(); - NN_addmm_f32(&actual, &x, &w, &b); + nn_addmm_f32(&actual, &x, &w, &b); cycles = read_cycles() - cycles; - printf("%s (%lu cycles)\n", NN_equals2d_f32(&golden, &actual, 1e-4) ? "PASS" : "FAIL", cycles); + printf("%s (%lu cycles)\n", nn_equals2d_f32(&golden, &actual, 1e-4) ? "PASS" : "FAIL", cycles); - // NN_free_tensor_data(actual); + // nn_free_tensor_data(actual); } { @@ -1005,12 +1005,12 @@ int main() { }; cycles = read_cycles(); - NN_relu2d_f32(&actual, &x); + nn_relu2d_f32(&actual, &x); cycles = read_cycles() - cycles; - printf("%s (%lu cycles)\n", NN_equals2d_f32(&golden, &actual, 1e-4) ? "PASS" : "FAIL", cycles); + printf("%s (%lu cycles)\n", nn_equals2d_f32(&golden, &actual, 1e-4) ? "PASS" : "FAIL", cycles); - // NN_free_tensor_data(actual); + // nn_free_tensor_data(actual); } { @@ -1035,12 +1035,12 @@ int main() { }; cycles = read_cycles(); - NN_elu2d_f32(&actual, &x, 1.0); + nn_elu2d_f32(&actual, &x, 1.0); cycles = read_cycles() - cycles; - printf("%s (%lu cycles)\n", NN_equals2d_f32(&golden, &actual, 1e-4) ? "PASS" : "FAIL", cycles); + printf("%s (%lu cycles)\n", nn_equals2d_f32(&golden, &actual, 1e-4) ? "PASS" : "FAIL", cycles); - // NN_free_tensor_data(actual); + // nn_free_tensor_data(actual); } } \ No newline at end of file