Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

nn: support multi-categories classification #915

Merged
merged 5 commits into from
Jan 4, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions common/ann/ann_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -91,7 +91,7 @@ func (m *MNIST) openFile(path string) ([][]float32, []uint8, error) {
line := scanner.Text()
splits := strings.Split(line, " ")
// Parse label
label, err := util.ParseUInt8(splits[0])
label, err := util.ParseUInt[uint8](splits[0])
if err != nil {
return nil, nil, err
}
Expand All @@ -104,7 +104,7 @@ func (m *MNIST) openFile(path string) ([][]float32, []uint8, error) {
if err != nil {
return nil, nil, err
}
value, err := util.ParseFloat32(kv[1])
value, err := util.ParseFloat[float32](kv[1])
if err != nil {
return nil, nil, err
}
Expand Down
2 changes: 1 addition & 1 deletion common/dataset/dataset.go
Original file line number Diff line number Diff line change
Expand Up @@ -67,7 +67,7 @@ func LoadIris() ([][]float32, []int, error) {
for i, row := range rows {
data[i] = make([]float32, 4)
for j, cell := range row[:4] {
data[i][j], err = util.ParseFloat32(cell)
data[i][j], err = util.ParseFloat[float32](cell)
if err != nil {
return nil, nil, err
}
Expand Down
19 changes: 18 additions & 1 deletion common/nn/functions.go
Original file line number Diff line number Diff line change
Expand Up @@ -189,10 +189,27 @@ func ReLu(x *Tensor) *Tensor {
return apply(&relu{}, x)
}

func MSE(x, y *Tensor) *Tensor {
func Softmax(x *Tensor, axis int) *Tensor {
return apply(&softmax{axis: axis}, x)
}

func MeanSquareError(x, y *Tensor) *Tensor {
return Mean(Square(Sub(x, y)))
}

func SoftmaxCrossEntropy(x, y *Tensor) *Tensor {
if len(x.shape) != 2 {
panic("the shape of the first tensor must be 2-D")
}
if len(y.shape) != 1 {
panic("the shape of the second tensor must be 1-D")
}
if x.shape[0] != y.shape[0] {
panic("the size of the first tensor must be equal to the size of the second tensor")
}
return apply(&softmaxCrossEntropy{}, x, y)
}

// BCEWithLogits is equivalent to:
//
// (1 + target) * math32.Log(1+math32.Exp(-prediction)) / 2 + (1 - target) * math32.Log(1+math32.Exp(prediction)) / 2
Expand Down
20 changes: 17 additions & 3 deletions common/nn/layers.go
Original file line number Diff line number Diff line change
Expand Up @@ -28,8 +28,8 @@ type linearLayer struct {

func NewLinear(in, out int) Layer {
return &linearLayer{
w: RandN(in, out).RequireGrad(),
b: RandN(out).RequireGrad(),
w: Rand(in, out).RequireGrad(),
b: Zeros(out).RequireGrad(),
}
}

Expand Down Expand Up @@ -62,7 +62,7 @@ type embeddingLayer struct {
func NewEmbedding(n int, shape ...int) Layer {
wShape := append([]int{n}, shape...)
return &embeddingLayer{
w: RandN(wShape...),
w: Rand(wShape...),
}
}

Expand All @@ -74,6 +74,20 @@ func (e *embeddingLayer) Forward(x *Tensor) *Tensor {
return Embedding(e.w, x)
}

type sigmoidLayer struct{}

func NewSigmoid() Layer {
return &sigmoidLayer{}
}

func (s *sigmoidLayer) Parameters() []*Tensor {
return nil
}

func (s *sigmoidLayer) Forward(x *Tensor) *Tensor {
return Sigmoid(x)
}

type reluLayer struct{}

func NewReLU() Layer {
Expand Down
141 changes: 141 additions & 0 deletions common/nn/nn_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,141 @@
// Copyright 2024 gorse Project Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

package nn

import (
"encoding/csv"
"github.com/chewxy/math32"
"github.com/stretchr/testify/assert"
"github.com/zhenghaoz/gorse/common/dataset"
"github.com/zhenghaoz/gorse/common/util"
"os"
"path/filepath"
"testing"
)

func TestLinearRegression(t *testing.T) {
x := Rand(100, 1)
y := Add(Rand(100, 1), NewScalar(5), Mul(NewScalar(2), x))

w := Zeros(1, 1)
b := Zeros(1)
predict := func(x *Tensor) *Tensor { return Add(MatMul(x, w), b) }

lr := float32(0.1)
for i := 0; i < 100; i++ {
yPred := predict(x)
loss := MeanSquareError(y, yPred)

w.grad = nil
b.grad = nil
loss.Backward()

w.sub(w.grad.mul(NewScalar(lr)))
b.sub(b.grad.mul(NewScalar(lr)))
}

assert.Equal(t, []int{1, 1}, w.shape)
assert.InDelta(t, float64(2), w.data[0], 0.5)
assert.Equal(t, []int{1}, b.shape)
assert.InDelta(t, float64(5), b.data[0], 0.5)
}

func TestNeuralNetwork(t *testing.T) {
x := Rand(100, 1)
y := Add(Rand(100, 1), Sin(Mul(x, NewScalar(2*math32.Pi))))

model := NewSequential(
NewLinear(1, 10),
NewSigmoid(),
NewLinear(10, 1),
)
NormalInit(model.(*Sequential).layers[0].(*linearLayer).w, 0, 0.01)
NormalInit(model.(*Sequential).layers[2].(*linearLayer).w, 0, 0.01)
optimizer := NewSGD(model.Parameters(), 0.2)

var l float32
for i := 0; i < 10000; i++ {
yPred := model.Forward(x)
loss := MeanSquareError(y, yPred)

optimizer.ZeroGrad()
loss.Backward()

optimizer.Step()
l = loss.data[0]
}
assert.InDelta(t, float64(0), l, 0.1)
}

func iris() (*Tensor, *Tensor, error) {
// Download dataset
path, err := dataset.DownloadAndUnzip("iris")
if err != nil {
return nil, nil, err
}
dataFile := filepath.Join(path, "iris.data")
// Load data
f, err := os.Open(dataFile)
if err != nil {
return nil, nil, err
}
reader := csv.NewReader(f)
rows, err := reader.ReadAll()
if err != nil {
return nil, nil, err
}
// Parse data
data := make([]float32, len(rows)*4)
target := make([]float32, len(rows))
types := make(map[string]int)
for i, row := range rows {
for j, cell := range row[:4] {
data[i*4+j], err = util.ParseFloat[float32](cell)
if err != nil {
return nil, nil, err
}
}
if _, exist := types[row[4]]; !exist {
types[row[4]] = len(types)
}
target[i] = float32(types[row[4]])
}
return NewTensor(data, len(rows), 4), NewTensor(target, len(rows)), nil
}

func TestIris(t *testing.T) {
x, y, err := iris()
assert.NoError(t, err)

model := NewSequential(
NewLinear(4, 100),
NewLinear(100, 100),
NewLinear(100, 3),
)
optimizer := NewAdam(model.Parameters(), 0.01)

var l float32
for i := 0; i < 1000; i++ {
yPred := model.Forward(x)
loss := SoftmaxCrossEntropy(yPred, y)

optimizer.ZeroGrad()
loss.Backward()

optimizer.Step()
l = loss.data[0]
}
assert.InDelta(t, float32(0), l, 0.1)
}
72 changes: 72 additions & 0 deletions common/nn/op.go
Original file line number Diff line number Diff line change
Expand Up @@ -715,6 +715,78 @@ func (r *relu) backward(dy *Tensor) []*Tensor {
return []*Tensor{dx}
}

type softmax struct {
base
axis int
}

func (s *softmax) String() string {
return "Softmax"
}

func (s *softmax) forward(inputs ...*Tensor) *Tensor {
x := inputs[0]
y := x.clone()
y.sub(x.max(s.axis, true))
y.exp()
y.div(y.sum(s.axis, true))
return y
}

func (s *softmax) backward(dy *Tensor) []*Tensor {
y := s.output
gx := y.clone()
gx.mul(dy)
sumdx := gx.sum(s.axis, true)
y.mul(sumdx)
gx.sub(y)
return []*Tensor{gx}
}

type softmaxCrossEntropy struct {
base
}

func (c *softmaxCrossEntropy) String() string {
return "SoftmaxCrossEntropy"
}

func (c *softmaxCrossEntropy) forward(inputs ...*Tensor) *Tensor {
x, t := inputs[0], inputs[1]
m := x.max(1, true)
s := x.clone().bSub(m) // x - m
s = s.exp() // exp(x - m)
s = s.sum(1, true) // sum(exp(x - m))
s.log() // log(sum(exp(x - m)))
m.add(s) // m + log(sum(exp(x - m)))
logP := x.clone().bSub(m) // x - (m + log(sum(exp(x - m))))
var crossEntropy float32
for i := 0; i < len(t.data); i++ {
crossEntropy -= logP.Get(i, int(t.data[i]))
}
crossEntropy /= float32(len(t.data))
return NewScalar(crossEntropy)
}

func (c *softmaxCrossEntropy) backward(dy *Tensor) []*Tensor {
x, t := c.inputs[0], c.inputs[1]
// gy *= 1/N
gy := dy.clone().mul(NewScalar(1 / float32(len(t.data))))
// y = softmax(x)
y := x.clone()
y.bSub(x.max(1, true))
y.exp()
y.bDiv(y.sum(1, true))
// convert to one-hot
oneHot := Zeros(x.shape...)
for i := 0; i < len(t.data); i++ {
oneHot.data[i*x.shape[1]+int(t.data[i])] = 1
}
// y = (y - t_onehot) * gy
y = y.sub(oneHot).mul(gy)
return []*Tensor{y, Zeros(t.shape...)}
}

type opHeap []op

func (h opHeap) Len() int {
Expand Down
Loading
Loading