Skip to content

Commit

Permalink
Prototype SAC
Browse files Browse the repository at this point in the history
  • Loading branch information
phisn committed Apr 27, 2024
1 parent 64de69d commit e8d5127
Show file tree
Hide file tree
Showing 8 changed files with 393 additions and 173 deletions.
68 changes: 54 additions & 14 deletions packages/learning/src/main.ts
Original file line number Diff line number Diff line change
@@ -1,26 +1,68 @@
import * as tf from "@tensorflow/tfjs"
// 1 -> 0
// 0 -> -1
// -1 -> 1

// Define a model for linear regression.
const model = tf.sequential()
model.add(tf.layers.dense({ units: 1, inputShape: [1] }))
import { SoftActorCritic } from "./soft-actor-critic/soft-actor-critic"

// Prepare the model for training: Specify the loss and the optimizer.
model.compile({ loss: "meanSquaredError", optimizer: "adam" })
function getReward(got: number, expected: number) {
const gotRounded = Math.round(got)

// Generate some synthetic data for training.
const xs = tf.tensor2d([1, 2, 3, 4, 5, 6, 7], [7, 1])
const ys = tf.tensor2d([2, 4, 6, 8, 10, 12, 14], [7, 1])
if (gotRounded === expected) {
return 0
}

// Train the model using the data.
model.fit(xs, ys, { epochs: 3000 * 7 }).then(() => {
;(model.predict(tf.tensor2d([1, 2, 3, 4], [4, 1])) as tf.Tensor).print()
if (gotRounded === 0) {
return expected === -1 ? 1 : -1
}

if (gotRounded === 1) {
return expected === 0 ? 1 : -1
}

return expected === 1 ? 1 : -1
}

const observationSize = 8
const actionSize = 1

const observations = [
[[-1, -1, -1, -1, -1, -1, -1, -1], [-1]],
[[0, 0, 0, 0, 0, 0, 0, 0], [0]],
[[1, 1, 1, 1, 1, 1, 1, 1], [1]],
[[-1, 0, 1, 0, -1, 0, 1, 0], [-1]],
[[0, 1, 0, -1, 0, 1, 0, -1], [0]],
[[1, 0, -1, 0, 1, 0, -1, 0], [1]],
[[-1, 1, -1, 1, -1, 1, -1, 1], [-1]],
[[1, -1, 1, -1, 1, -1, 1, -1], [1]],
]

const sac = new SoftActorCritic({
mlpSpec: {
sizes: [64, 64],
activation: "relu",
outputActivation: "relu",
},
actionSize,
observationSize,
maxEpisodeLength: 1000,
bufferSize: 10000,
batchSize: 64,
updateAfter: 1000,
updateEvery: 50,
learningRate: 0.001,
alpha: 0.2,
gamma: 0.99,
})

const x = sac.act([0, 0, 0, 0, 0, 0, 0, 0])
x.print()

/*
import { WorldModel } from "runtime/proto/world"
import { Game } from "./game/game"
import { GameLoop } from "./game/game-loop"
import { GameInstanceType, GameSettings } from "./game/game-settings"
import * as tf from '@tensorflow/tfjs';
function base64ToBytes(base64: string) {
return Uint8Array.from(atob(base64), c => c.charCodeAt(0))
Expand All @@ -43,5 +85,3 @@ try {
console.error(e)
}
*/

console.log("test")
159 changes: 0 additions & 159 deletions packages/learning/src/sac/sac.ts

This file was deleted.

71 changes: 71 additions & 0 deletions packages/learning/src/soft-actor-critic/actor.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,71 @@
import * as tf from "@tensorflow/tfjs"
import { GaussianLikelihood } from "./gaussian-likelihood"
import { MlpSpecification, mlp } from "./mlp"

const LOG_STD_MIN = -20
const LOG_STD_MAX = 2

export class Actor extends tf.layers.Layer {
private gaussianLikelihood: tf.layers.Layer

private net: tf.Sequential
private meanLayer: tf.layers.Layer
private stdevLayer: tf.layers.Layer

constructor(observationSize: number, actionSize: number, mlpSpec: MlpSpecification) {
super()

this.net = mlp({
...mlpSpec,
sizes: [observationSize, ...mlpSpec.sizes],
})

this.meanLayer = tf.layers.dense({
units: actionSize,
})

this.stdevLayer = tf.layers.dense({
units: actionSize,
})

this.gaussianLikelihood = new GaussianLikelihood()
}

call(x: tf.Tensor<tf.Rank>): tf.Tensor<tf.Rank>[] {
x = this.net.apply(x) as tf.Tensor<tf.Rank>
const mu = this.meanLayer.apply(x) as tf.Tensor<tf.Rank>

let logSigma = this.stdevLayer.apply(x) as tf.Tensor<tf.Rank>
logSigma = tf.clipByValue(logSigma, LOG_STD_MIN, LOG_STD_MAX)
const sigma = tf.exp(logSigma)

let action = tf.mul(tf.randomNormal(mu.shape), sigma)
action = tf.tanh(action)

let logpPi = this.gaussianLikelihood.apply([action, mu, logSigma]) as tf.Tensor<tf.Rank>

logpPi = tf.sub(
logpPi,
tf.sum(
tf.mul(2, tf.sub(tf.sub(Math.log(2), action), tf.softplus(tf.mul(-2, action)))),
1,
),
)

return [action, logpPi]
}

get trainableWeights(): tf.LayerVariable[] {
return [
...this.net.trainableWeights,
...this.meanLayer.trainableWeights,
...this.stdevLayer.trainableWeights,
]
}

static get className() {
return "Actor"
}
}

tf.serialization.registerClass(Actor)
32 changes: 32 additions & 0 deletions packages/learning/src/soft-actor-critic/critic.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
import * as tf from "@tensorflow/tfjs"
import { MlpSpecification, mlp } from "./mlp"

export class Critic extends tf.layers.Layer {
private q: tf.Sequential

constructor(observationSize: number, actionSize: number, mlpSpec: MlpSpecification) {
super()

this.q = mlp({
...mlpSpec,
sizes: [observationSize + actionSize, ...mlpSpec.sizes],
outputActivation: undefined,
})
}

call([obs, act]: tf.Tensor<tf.Rank>[]): tf.Tensor<tf.Rank> {
let x = tf.concat([obs, act], 1)
x = this.q.apply(x) as tf.Tensor<tf.Rank>
return tf.squeeze(x, [1])
}

get trainableWeights(): tf.LayerVariable[] {
return this.q.trainableWeights
}

static get className() {
return "Critic"
}
}

tf.serialization.registerClass(Critic)
25 changes: 25 additions & 0 deletions packages/learning/src/soft-actor-critic/gaussian-likelihood.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
import * as tf from "@tensorflow/tfjs"

export class GaussianLikelihood extends tf.layers.Layer {
computeOutputShape(inputShape: tf.Shape[]): tf.Shape | tf.Shape[] {
return [inputShape[0][0], 1]
}

call([x, mu, logstd]: tf.Tensor<tf.Rank>[]): tf.Tensor<tf.Rank> {
const preSum = tf.mul(
-0.5,
tf.add(
tf.pow(tf.div(tf.sub(x, mu), tf.exp(logstd)), 2),
tf.add(tf.mul(2, logstd), Math.log(2 * Math.PI)),
),
)

return tf.sum(preSum, 1)
}

static get className() {
return "GaussianLikelihood"
}
}

tf.serialization.registerClass(GaussianLikelihood)
Loading

0 comments on commit e8d5127

Please sign in to comment.