Prototype SAC

phisn · Apr 27, 2024 · e8d5127 · e8d5127
1 parent 64de69d
commit e8d5127
Show file tree

Hide file tree

Showing 8 changed files with 393 additions and 173 deletions.
diff --git a/packages/learning/src/main.ts b/packages/learning/src/main.ts
@@ -1,26 +1,68 @@
-import * as tf from "@tensorflow/tfjs"
+// 1 -> 0
+// 0 -> -1
+// -1 -> 1
 
-// Define a model for linear regression.
-const model = tf.sequential()
-model.add(tf.layers.dense({ units: 1, inputShape: [1] }))
+import { SoftActorCritic } from "./soft-actor-critic/soft-actor-critic"
 
-// Prepare the model for training: Specify the loss and the optimizer.
-model.compile({ loss: "meanSquaredError", optimizer: "adam" })
+function getReward(got: number, expected: number) {
+    const gotRounded = Math.round(got)
 
-// Generate some synthetic data for training.
-const xs = tf.tensor2d([1, 2, 3, 4, 5, 6, 7], [7, 1])
-const ys = tf.tensor2d([2, 4, 6, 8, 10, 12, 14], [7, 1])
+    if (gotRounded === expected) {
+        return 0
+    }
 
-// Train the model using the data.
-model.fit(xs, ys, { epochs: 3000 * 7 }).then(() => {
-    ;(model.predict(tf.tensor2d([1, 2, 3, 4], [4, 1])) as tf.Tensor).print()
+    if (gotRounded === 0) {
+        return expected === -1 ? 1 : -1
+    }
+
+    if (gotRounded === 1) {
+        return expected === 0 ? 1 : -1
+    }
+
+    return expected === 1 ? 1 : -1
+}
+
+const observationSize = 8
+const actionSize = 1
+
+const observations = [
+    [[-1, -1, -1, -1, -1, -1, -1, -1], [-1]],
+    [[0, 0, 0, 0, 0, 0, 0, 0], [0]],
+    [[1, 1, 1, 1, 1, 1, 1, 1], [1]],
+    [[-1, 0, 1, 0, -1, 0, 1, 0], [-1]],
+    [[0, 1, 0, -1, 0, 1, 0, -1], [0]],
+    [[1, 0, -1, 0, 1, 0, -1, 0], [1]],
+    [[-1, 1, -1, 1, -1, 1, -1, 1], [-1]],
+    [[1, -1, 1, -1, 1, -1, 1, -1], [1]],
+]
+
+const sac = new SoftActorCritic({
+    mlpSpec: {
+        sizes: [64, 64],
+        activation: "relu",
+        outputActivation: "relu",
+    },
+    actionSize,
+    observationSize,
+    maxEpisodeLength: 1000,
+    bufferSize: 10000,
+    batchSize: 64,
+    updateAfter: 1000,
+    updateEvery: 50,
+    learningRate: 0.001,
+    alpha: 0.2,
+    gamma: 0.99,
 })
 
+const x = sac.act([0, 0, 0, 0, 0, 0, 0, 0])
+x.print()
+
 /*
 import { WorldModel } from "runtime/proto/world"
 import { Game } from "./game/game"
 import { GameLoop } from "./game/game-loop"
 import { GameInstanceType, GameSettings } from "./game/game-settings"
+import * as tf from '@tensorflow/tfjs';
 
 function base64ToBytes(base64: string) {
     return Uint8Array.from(atob(base64), c => c.charCodeAt(0))
@@ -43,5 +85,3 @@ try {
     console.error(e)
 }
 */
-
-console.log("test")
diff --git a/packages/learning/src/sac/sac.ts b/packages/learning/src/sac/sac.ts
diff --git a/packages/learning/src/soft-actor-critic/actor.ts b/packages/learning/src/soft-actor-critic/actor.ts
@@ -0,0 +1,71 @@
+import * as tf from "@tensorflow/tfjs"
+import { GaussianLikelihood } from "./gaussian-likelihood"
+import { MlpSpecification, mlp } from "./mlp"
+
+const LOG_STD_MIN = -20
+const LOG_STD_MAX = 2
+
+export class Actor extends tf.layers.Layer {
+    private gaussianLikelihood: tf.layers.Layer
+
+    private net: tf.Sequential
+    private meanLayer: tf.layers.Layer
+    private stdevLayer: tf.layers.Layer
+
+    constructor(observationSize: number, actionSize: number, mlpSpec: MlpSpecification) {
+        super()
+
+        this.net = mlp({
+            ...mlpSpec,
+            sizes: [observationSize, ...mlpSpec.sizes],
+        })
+
+        this.meanLayer = tf.layers.dense({
+            units: actionSize,
+        })
+
+        this.stdevLayer = tf.layers.dense({
+            units: actionSize,
+        })
+
+        this.gaussianLikelihood = new GaussianLikelihood()
+    }
+
+    call(x: tf.Tensor<tf.Rank>): tf.Tensor<tf.Rank>[] {
+        x = this.net.apply(x) as tf.Tensor<tf.Rank>
+        const mu = this.meanLayer.apply(x) as tf.Tensor<tf.Rank>
+
+        let logSigma = this.stdevLayer.apply(x) as tf.Tensor<tf.Rank>
+        logSigma = tf.clipByValue(logSigma, LOG_STD_MIN, LOG_STD_MAX)
+        const sigma = tf.exp(logSigma)
+
+        let action = tf.mul(tf.randomNormal(mu.shape), sigma)
+        action = tf.tanh(action)
+
+        let logpPi = this.gaussianLikelihood.apply([action, mu, logSigma]) as tf.Tensor<tf.Rank>
+
+        logpPi = tf.sub(
+            logpPi,
+            tf.sum(
+                tf.mul(2, tf.sub(tf.sub(Math.log(2), action), tf.softplus(tf.mul(-2, action)))),
+                1,
+            ),
+        )
+
+        return [action, logpPi]
+    }
+
+    get trainableWeights(): tf.LayerVariable[] {
+        return [
+            ...this.net.trainableWeights,
+            ...this.meanLayer.trainableWeights,
+            ...this.stdevLayer.trainableWeights,
+        ]
+    }
+
+    static get className() {
+        return "Actor"
+    }
+}
+
+tf.serialization.registerClass(Actor)
diff --git a/packages/learning/src/soft-actor-critic/critic.ts b/packages/learning/src/soft-actor-critic/critic.ts
@@ -0,0 +1,32 @@
+import * as tf from "@tensorflow/tfjs"
+import { MlpSpecification, mlp } from "./mlp"
+
+export class Critic extends tf.layers.Layer {
+    private q: tf.Sequential
+
+    constructor(observationSize: number, actionSize: number, mlpSpec: MlpSpecification) {
+        super()
+
+        this.q = mlp({
+            ...mlpSpec,
+            sizes: [observationSize + actionSize, ...mlpSpec.sizes],
+            outputActivation: undefined,
+        })
+    }
+
+    call([obs, act]: tf.Tensor<tf.Rank>[]): tf.Tensor<tf.Rank> {
+        let x = tf.concat([obs, act], 1)
+        x = this.q.apply(x) as tf.Tensor<tf.Rank>
+        return tf.squeeze(x, [1])
+    }
+
+    get trainableWeights(): tf.LayerVariable[] {
+        return this.q.trainableWeights
+    }
+
+    static get className() {
+        return "Critic"
+    }
+}
+
+tf.serialization.registerClass(Critic)
diff --git a/packages/learning/src/soft-actor-critic/gaussian-likelihood.ts b/packages/learning/src/soft-actor-critic/gaussian-likelihood.ts
@@ -0,0 +1,25 @@
+import * as tf from "@tensorflow/tfjs"
+
+export class GaussianLikelihood extends tf.layers.Layer {
+    computeOutputShape(inputShape: tf.Shape[]): tf.Shape | tf.Shape[] {
+        return [inputShape[0][0], 1]
+    }
+
+    call([x, mu, logstd]: tf.Tensor<tf.Rank>[]): tf.Tensor<tf.Rank> {
+        const preSum = tf.mul(
+            -0.5,
+            tf.add(
+                tf.pow(tf.div(tf.sub(x, mu), tf.exp(logstd)), 2),
+                tf.add(tf.mul(2, logstd), Math.log(2 * Math.PI)),
+            ),
+        )
+
+        return tf.sum(preSum, 1)
+    }
+
+    static get className() {
+        return "GaussianLikelihood"
+    }
+}
+
+tf.serialization.registerClass(GaussianLikelihood)