From d5d22de38cb02f806fc8116254150942dee71d19 Mon Sep 17 00:00:00 2001 From: "-T.K.-" Date: Fri, 13 Dec 2024 15:58:27 -0800 Subject: [PATCH] ADD: add 1d softmax --- CMakeLists.txt | 1 + converter/src/torchconverter/tracer.py | 4 ++++ src/cpu/softmax.c | 30 ++++++++++++++++++++++++++ tests/generate_test.py | 3 +++ tests/src/generated.c | 30 ++++++++++++++++++++++++++ 5 files changed, 68 insertions(+) create mode 100644 src/cpu/softmax.c diff --git a/CMakeLists.txt b/CMakeLists.txt index 221efdf..a41172b 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -102,6 +102,7 @@ set(cpu_impl ./src/cpu/mul.c ./src/cpu/mulscalar.c ./src/cpu/relu.c + ./src/cpu/softmax.c ./src/cpu/tanh.c ) diff --git a/converter/src/torchconverter/tracer.py b/converter/src/torchconverter/tracer.py index 340ebbf..300548c 100644 --- a/converter/src/torchconverter/tracer.py +++ b/converter/src/torchconverter/tracer.py @@ -322,6 +322,10 @@ def handle_call_module(self, n: torch.fx.node.Node, out: torch.Tensor): elif type(module) == torch.nn.Tanh: self.add_uninitialized_tensor(layer_name, out) self.add_forward_call("nn_tanh{dim}d_{dtype}", out, layer_name, input_names) + + elif type(module) == torch.nn.Softmax: + self.add_uninitialized_tensor(layer_name, out) + self.add_forward_call("nn_softmax{dim}d_{dtype}", out, layer_name, input_names) # Linear Layers elif type(module) == torch.nn.Linear: diff --git a/src/cpu/softmax.c b/src/cpu/softmax.c new file mode 100644 index 0000000..247c727 --- /dev/null +++ b/src/cpu/softmax.c @@ -0,0 +1,30 @@ +#include "nn.h" + + +__attribute__((weak)) void nn_softmax1d_f16(Tensor1D_F16 *y, const Tensor1D_F16 *x) { + nn_assert(y->shape[0] == x->shape[0], "Cannot add tensors of different shapes"); + + size_t n = y->shape[0]; + float sum = 0.0f; + for (size_t i = 0; i < n; i += 1) { + sum += expf(as_f32(x->data[i])); + } + + for (size_t i = 0; i < n; i += 1) { + y->data[i] = as_f16(expf(as_f32(x->data[i])) / sum); + } +} + +__attribute__((weak)) void nn_softmax1d_f32(Tensor1D_F32 *y, const Tensor1D_F32 *x) { + nn_assert(y->shape[0] == x->shape[0], "Cannot add tensors of different shapes"); + + size_t n = y->shape[0]; + float sum = 0.0f; + for (size_t i = 0; i < n; i += 1) { + sum += expf(x->data[i]); + } + + for (size_t i = 0; i < n; i += 1) { + y->data[i] = expf(x->data[i]) / sum; + } +} diff --git a/tests/generate_test.py b/tests/generate_test.py index 182bc3b..96abd38 100644 --- a/tests/generate_test.py +++ b/tests/generate_test.py @@ -300,6 +300,9 @@ def generate(self, out_file: str): # Tanh t.add_test("nn_tanh2d_f32", lambda x: torch.nn.functional.tanh(x), [("x", t.rand((7, 7))) ]) + # Softmax + t.add_test("nn_softmax1d_f32", lambda x: torch.nn.functional.softmax(x), [("x", t.rand((7, )))]) + t.generate(out_file) diff --git a/tests/src/generated.c b/tests/src/generated.c index 74c86ee..06b2549 100644 --- a/tests/src/generated.c +++ b/tests/src/generated.c @@ -1103,4 +1103,34 @@ int main() { // nn_free_tensor_data(actual); } + { + printf("nn_softmax1d_f32: "); + + // [ 1.463511 -2.0732946 2.5087662 -0.44954896 0.43284953 -3.458044 -4.46862 + Tensor1D_F32 x = { + .shape = { 7 }, + .data = (float *)((uint8_t[]){ 0x54,0x54,0xbb,0x3f,0xdc,0xb0,0x4,0xc0,0xa0,0x8f,0x20,0x40,0x48,0x2b,0xe6,0xbe,0x74,0x9e,0xdd,0x3e,0x98,0x50,0x5d,0xc0,0x0,0xff,0x8e,0xc0 }) + }; + + + // [2.2791658e-01 6.6337758e-03 6.4822310e-01 3.3646863e-02 8.1313998e-02 1.661008 + Tensor1D_F32 golden = { + .shape = { 7 }, + .data = (float *)((uint8_t[]){ 0xf7,0x62,0x69,0x3e,0x25,0x60,0xd9,0x3b,0xf3,0xf1,0x25,0x3f,0x4b,0xd1,0x9,0x3d,0xf4,0x87,0xa6,0x3d,0x2f,0xb6,0xd9,0x3a,0x32,0x7f,0x1e,0x3a }) + }; + // + Tensor1D_F32 actual = { + .shape = { 7 }, + .data = (float *)malloc(sizeof(float) * 7) + }; + + cycles = read_cycles(); + nn_softmax1d_f32(&actual, &x); + cycles = read_cycles() - cycles; + printf("%s (%lu cycles)\n", nn_equals1d_f32(&golden, &actual, 1e-4) ? "PASS" : "FAIL", cycles); + + + // nn_free_tensor_data(actual); + } + } \ No newline at end of file