gorse-io · zhenghaoz · Jan 4, 2025 · Jan 3, 2025 · Jan 3, 2025 · Jan 4, 2025
diff --git a/common/ann/ann_test.go b/common/ann/ann_test.go
@@ -91,7 +91,7 @@ func (m *MNIST) openFile(path string) ([][]float32, []uint8, error) {
 		line := scanner.Text()
 		splits := strings.Split(line, " ")
 		// Parse label
-		label, err := util.ParseUInt8(splits[0])
+		label, err := util.ParseUInt[uint8](splits[0])
 		if err != nil {
 			return nil, nil, err
 		}
@@ -104,7 +104,7 @@ func (m *MNIST) openFile(path string) ([][]float32, []uint8, error) {
 			if err != nil {
 				return nil, nil, err
 			}
-			value, err := util.ParseFloat32(kv[1])
+			value, err := util.ParseFloat[float32](kv[1])
 			if err != nil {
 				return nil, nil, err
 			}

diff --git a/common/dataset/dataset.go b/common/dataset/dataset.go
@@ -67,7 +67,7 @@ func LoadIris() ([][]float32, []int, error) {
 	for i, row := range rows {
 		data[i] = make([]float32, 4)
 		for j, cell := range row[:4] {
-			data[i][j], err = util.ParseFloat32(cell)
+			data[i][j], err = util.ParseFloat[float32](cell)
 			if err != nil {
 				return nil, nil, err
 			}

diff --git a/common/nn/functions.go b/common/nn/functions.go
@@ -189,10 +189,27 @@ func ReLu(x *Tensor) *Tensor {
 	return apply(&relu{}, x)
 }
 
-func MSE(x, y *Tensor) *Tensor {
+func Softmax(x *Tensor, axis int) *Tensor {
+	return apply(&softmax{axis: axis}, x)
+}
+
+func MeanSquareError(x, y *Tensor) *Tensor {
 	return Mean(Square(Sub(x, y)))
 }
 
+func SoftmaxCrossEntropy(x, y *Tensor) *Tensor {
+	if len(x.shape) != 2 {
+		panic("the shape of the first tensor must be 2-D")
+	}
+	if len(y.shape) != 1 {
+		panic("the shape of the second tensor must be 1-D")
+	}
+	if x.shape[0] != y.shape[0] {
+		panic("the size of the first tensor must be equal to the size of the second tensor")
+	}
+	return apply(&softmaxCrossEntropy{}, x, y)
+}
+
 // BCEWithLogits is equivalent to:
 //
 //	(1 + target) * math32.Log(1+math32.Exp(-prediction)) / 2 + (1 - target) * math32.Log(1+math32.Exp(prediction)) / 2

diff --git a/common/nn/layers.go b/common/nn/layers.go
@@ -28,8 +28,8 @@ type linearLayer struct {
 
 func NewLinear(in, out int) Layer {
 	return &linearLayer{
-		w: RandN(in, out).RequireGrad(),
-		b: RandN(out).RequireGrad(),
+		w: Rand(in, out).RequireGrad(),
+		b: Zeros(out).RequireGrad(),
 	}
 }
 
@@ -62,7 +62,7 @@ type embeddingLayer struct {
 func NewEmbedding(n int, shape ...int) Layer {
 	wShape := append([]int{n}, shape...)
 	return &embeddingLayer{
-		w: RandN(wShape...),
+		w: Rand(wShape...),
 	}
 }
 
@@ -74,6 +74,20 @@ func (e *embeddingLayer) Forward(x *Tensor) *Tensor {
 	return Embedding(e.w, x)
 }
 
+type sigmoidLayer struct{}
+
+func NewSigmoid() Layer {
+	return &sigmoidLayer{}
+}
+
+func (s *sigmoidLayer) Parameters() []*Tensor {
+	return nil
+}
+
+func (s *sigmoidLayer) Forward(x *Tensor) *Tensor {
+	return Sigmoid(x)
+}
+
 type reluLayer struct{}
 
 func NewReLU() Layer {

diff --git a/common/nn/nn_test.go b/common/nn/nn_test.go
@@ -0,0 +1,141 @@
+// Copyright 2024 gorse Project Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package nn
+
+import (
+	"encoding/csv"
+	"github.com/chewxy/math32"
+	"github.com/stretchr/testify/assert"
+	"github.com/zhenghaoz/gorse/common/dataset"
+	"github.com/zhenghaoz/gorse/common/util"
+	"os"
+	"path/filepath"
+	"testing"
+)
+
+func TestLinearRegression(t *testing.T) {
+	x := Rand(100, 1)
+	y := Add(Rand(100, 1), NewScalar(5), Mul(NewScalar(2), x))
+
+	w := Zeros(1, 1)
+	b := Zeros(1)
+	predict := func(x *Tensor) *Tensor { return Add(MatMul(x, w), b) }
+
+	lr := float32(0.1)
+	for i := 0; i < 100; i++ {
+		yPred := predict(x)
+		loss := MeanSquareError(y, yPred)
+
+		w.grad = nil
+		b.grad = nil
+		loss.Backward()
+
+		w.sub(w.grad.mul(NewScalar(lr)))
+		b.sub(b.grad.mul(NewScalar(lr)))
+	}
+
+	assert.Equal(t, []int{1, 1}, w.shape)
+	assert.InDelta(t, float64(2), w.data[0], 0.5)
+	assert.Equal(t, []int{1}, b.shape)
+	assert.InDelta(t, float64(5), b.data[0], 0.5)
+}
+
+func TestNeuralNetwork(t *testing.T) {
+	x := Rand(100, 1)
+	y := Add(Rand(100, 1), Sin(Mul(x, NewScalar(2*math32.Pi))))
+
+	model := NewSequential(
+		NewLinear(1, 10),
+		NewSigmoid(),
+		NewLinear(10, 1),
+	)
+	NormalInit(model.(*Sequential).layers[0].(*linearLayer).w, 0, 0.01)
+	NormalInit(model.(*Sequential).layers[2].(*linearLayer).w, 0, 0.01)
+	optimizer := NewSGD(model.Parameters(), 0.2)
+
+	var l float32
+	for i := 0; i < 10000; i++ {
+		yPred := model.Forward(x)
+		loss := MeanSquareError(y, yPred)
+
+		optimizer.ZeroGrad()
+		loss.Backward()
+
+		optimizer.Step()
+		l = loss.data[0]
+	}
+	assert.InDelta(t, float64(0), l, 0.1)
+}
+
+func iris() (*Tensor, *Tensor, error) {
+	// Download dataset
+	path, err := dataset.DownloadAndUnzip("iris")
+	if err != nil {
+		return nil, nil, err
+	}
+	dataFile := filepath.Join(path, "iris.data")
+	// Load data
+	f, err := os.Open(dataFile)
+	if err != nil {
+		return nil, nil, err
+	}
+	reader := csv.NewReader(f)
+	rows, err := reader.ReadAll()
+	if err != nil {
+		return nil, nil, err
+	}
+	// Parse data
+	data := make([]float32, len(rows)*4)
+	target := make([]float32, len(rows))
+	types := make(map[string]int)
+	for i, row := range rows {
+		for j, cell := range row[:4] {
+			data[i*4+j], err = util.ParseFloat[float32](cell)
+			if err != nil {
+				return nil, nil, err
+			}
+		}
+		if _, exist := types[row[4]]; !exist {
+			types[row[4]] = len(types)
+		}
+		target[i] = float32(types[row[4]])
+	}
+	return NewTensor(data, len(rows), 4), NewTensor(target, len(rows)), nil
+}
+
+func TestIris(t *testing.T) {
+	x, y, err := iris()
+	assert.NoError(t, err)
+
+	model := NewSequential(
+		NewLinear(4, 100),
+		NewLinear(100, 100),
+		NewLinear(100, 3),
+	)
+	optimizer := NewAdam(model.Parameters(), 0.01)
+
+	var l float32
+	for i := 0; i < 1000; i++ {
+		yPred := model.Forward(x)
+		loss := SoftmaxCrossEntropy(yPred, y)
+
+		optimizer.ZeroGrad()
+		loss.Backward()
+
+		optimizer.Step()
+		l = loss.data[0]
+	}
+	assert.InDelta(t, float32(0), l, 0.1)
+}
diff --git a/common/nn/op.go b/common/nn/op.go
@@ -715,6 +715,78 @@ func (r *relu) backward(dy *Tensor) []*Tensor {
 	return []*Tensor{dx}
 }
 
+type softmax struct {
+	base
+	axis int
+}
+
+func (s *softmax) String() string {
+	return "Softmax"
+}
+
+func (s *softmax) forward(inputs ...*Tensor) *Tensor {
+	x := inputs[0]
+	y := x.clone()
+	y.sub(x.max(s.axis, true))
+	y.exp()
+	y.div(y.sum(s.axis, true))
+	return y
+}
+
+func (s *softmax) backward(dy *Tensor) []*Tensor {
+	y := s.output
+	gx := y.clone()
+	gx.mul(dy)
+	sumdx := gx.sum(s.axis, true)
+	y.mul(sumdx)
+	gx.sub(y)
+	return []*Tensor{gx}
+}
+
+type softmaxCrossEntropy struct {
+	base
+}
+
+func (c *softmaxCrossEntropy) String() string {
+	return "SoftmaxCrossEntropy"
+}
+
+func (c *softmaxCrossEntropy) forward(inputs ...*Tensor) *Tensor {
+	x, t := inputs[0], inputs[1]
+	m := x.max(1, true)
+	s := x.clone().bSub(m)    // x - m
+	s = s.exp()               // exp(x - m)
+	s = s.sum(1, true)        // sum(exp(x - m))
+	s.log()                   // log(sum(exp(x - m)))
+	m.add(s)                  // m + log(sum(exp(x - m)))
+	logP := x.clone().bSub(m) // x - (m + log(sum(exp(x - m))))
+	var crossEntropy float32
+	for i := 0; i < len(t.data); i++ {
+		crossEntropy -= logP.Get(i, int(t.data[i]))
+	}
+	crossEntropy /= float32(len(t.data))
+	return NewScalar(crossEntropy)
+}
+
+func (c *softmaxCrossEntropy) backward(dy *Tensor) []*Tensor {
+	x, t := c.inputs[0], c.inputs[1]
+	// gy *= 1/N
+	gy := dy.clone().mul(NewScalar(1 / float32(len(t.data))))
+	// y = softmax(x)
+	y := x.clone()
+	y.bSub(x.max(1, true))
+	y.exp()
+	y.bDiv(y.sum(1, true))
+	// convert to one-hot
+	oneHot := Zeros(x.shape...)
+	for i := 0; i < len(t.data); i++ {
+		oneHot.data[i*x.shape[1]+int(t.data[i])] = 1
+	}
+	// y = (y - t_onehot) * gy
+	y = y.sub(oneHot).mul(gy)
+	return []*Tensor{y, Zeros(t.shape...)}
+}
+
 type opHeap []op
 
 func (h opHeap) Len() int {