Skip to content

Commit

Permalink
Continue working on sac
Browse files Browse the repository at this point in the history
  • Loading branch information
phisn committed Apr 25, 2024
1 parent 82089cd commit 64de69d
Showing 1 changed file with 11 additions and 6 deletions.
17 changes: 11 additions & 6 deletions packages/learning/src/sac/sac.ts
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ function mlp(
x: tf.Tensor2D,
hiddenSizes: number[],
activation: ActivationIdentifier,
outputActivation: ActivationIdentifier,
outputActivation: ActivationIdentifier | undefined,
) {
for (const h of hiddenSizes) {
x = tf.layers
Expand Down Expand Up @@ -74,7 +74,11 @@ function mlpGaussianPolicy(
const pi = tf.add(mu, tf.mul(tf.randomNormal(std.shape), std))
const logPi = gaussianLikelihood(a, mu, logstdClipped)

return { mu, pi, logPi }
return {
mu: mu as tf.Tensor2D,
pi: pi as tf.Tensor2D,
logPi: logPi as tf.Tensor2D,
}
}

/*
Expand Down Expand Up @@ -142,13 +146,14 @@ function mlpActorCritic(
} = applySquashingFunction(mu, pi, logPi)

const actionScale = actionSpace
mu = tf.mul(mu, actionScale)
pi = tf.mul(pi, actionScale)
const muScaled = tf.mul(muSquashed, actionScale)
const piScaled = tf.mul(piSquashed, actionScale)

const vfMlp = (x: tf.Tensor2D) => tf.squeeze(mlp(x, [...hiddenSizes, 1], activation, null), 1)
const vfMlp = (x: tf.Tensor2D) =>
tf.squeeze(mlp(x, [...hiddenSizes, 1], activation, undefined), [1])

const q1 = vfMlp(tf.concat([x, a], 1))
const q2 = vfMlp(tf.concat([x, a], 1))

return { mu, pi, logPi, q1, q2 }
return { mu: muScaled, pi: piScaled, logPi: logPiSquashed, q1, q2 }
}

0 comments on commit 64de69d

Please sign in to comment.