diff --git a/packages/learning-gym/src/main.ts b/packages/learning-gym/src/main.ts
index cfbd43a5..930eac5e 100644
--- a/packages/learning-gym/src/main.ts
+++ b/packages/learning-gym/src/main.ts
@@ -1,30 +1,42 @@
 import RAPIER from "custom-rapier2d-node/rapier"
-import * as fs from "fs"
 import * as gl from "gl"
-import { PNG } from "pngjs"
+import { EntityWith, MessageCollector } from "runtime-framework"
 import { WorldModel } from "runtime/proto/world"
+import { LevelCapturedMessage } from "runtime/src/core/level-capture/level-captured-message"
+import { RuntimeComponents } from "runtime/src/core/runtime-components"
 import { RuntimeSystemContext } from "runtime/src/core/runtime-system-stack"
-import { newRuntime } from "runtime/src/runtime"
+import { Runtime, newRuntime } from "runtime/src/runtime"
 import * as THREE from "three"
 import { GameAgentWrapper } from "web-game/src/game/game-agent-wrapper"
+import { Reward, RewardFactory } from "../../web-game/src/game/reward/default-reward"
 
-interface GameEnvironmentConfig {
+export interface GameEnvironmentConfig {
     width: number
     height: number
-    fps: number
+    stepsPerFrame: number
 }
 
-class GameEnvironment {
+export class GameEnvironment {
     private observationImageBuffer: Buffer
     private observationFeatureBuffer: Buffer
     private imageBuffer: Buffer
 
-    private game: GameAgentWrapper
+    private runtime!: Runtime
+    private reward!: Reward
+    private game!: GameAgentWrapper
     private renderer: THREE.WebGLRenderer
 
-    private rotation: number
-
-    constructor(private config: GameEnvironmentConfig) {
+    private rotation!: number
+    private rocket!: EntityWith<RuntimeComponents, "rocket" | "rigidBody">
+    private targetFlag!: EntityWith<RuntimeComponents, "level">
+    private capturedCollector!: MessageCollector<LevelCapturedMessage>
+
+    constructor(
+        private world: WorldModel,
+        private gamemode: string,
+        private config: GameEnvironmentConfig,
+        private rewardFactory: RewardFactory,
+    ) {
         // features (4 bytes)
         // - velocity x
         // - velocity y
@@ -40,10 +52,6 @@ class GameEnvironment {
         // source image has additionally alpha channel
         this.imageBuffer = Buffer.alloc(config.width * config.height * 4)
 
-        this.game = new GameAgentWrapper(runtime, new THREE.Scene() as any)
-
-        this.rotation = 0
-
         const canvas = {
             width: config.width,
             height: config.height,
@@ -63,25 +71,43 @@ class GameEnvironment {
 
         const renderTarget = new THREE.WebGLRenderTarget(config.width, config.height)
         this.renderer.setRenderTarget(renderTarget)
+
+        this.reset()
     }
 
-    reset() {}
+    reset(): [Buffer, Buffer] {
+        this.runtime = newRuntime(RAPIER as any, this.world, this.gamemode)
+        this.game = new GameAgentWrapper(this.runtime, new THREE.Scene() as any)
+        this.rocket = this.runtime.factoryContext.store.find("rocket", "rigidBody")[0]
+        this.capturedCollector = this.runtime.factoryContext.messageStore.collect("levelCaptured")
+        this.targetFlag = nextFlag(this.runtime, this.rocket)
+        this.rotation = 0
+        this.reward = this.rewardFactory(this.runtime)
+
+        this.extractPixelsToObservationBuffer()
+        this.prepareFeatureBuffer()
+
+        return [this.observationImageBuffer, this.observationFeatureBuffer]
+    }
 
-    step(action: Buffer): [Buffer, Buffer] {
+    step(action: Buffer): [number, boolean, Buffer, Buffer] {
         const input = this.stepWithActionToInput(action.readInt8(0))
-        this.game.step(input)
-        this.game.step(input)
-        this.game.step(input)
-        this.game.step(input)
+
+        const [reward, done] = this.reward.next(() => {
+            for (let i = 0; i < this.config.stepsPerFrame; ++i) {
+                this.game.step(input)
+            }
+        })
+
         this.renderer.render(this.game.sceneModule.getScene() as any, this.game.camera as any)
 
         this.extractPixelsToObservationBuffer()
+        this.prepareFeatureBuffer()
 
-        return [this.observationImageBuffer, this.observationFeatureBuffer]
+        return [reward, done, this.observationImageBuffer, this.observationFeatureBuffer]
     }
 
     stepWithActionToInput(action: number): RuntimeSystemContext {
-        console.log("Action: ", action)
         switch (action) {
             case 0:
                 return { thrust: false, rotation: this.rotation }
@@ -130,10 +156,51 @@ class GameEnvironment {
             }
         }
     }
+
+    prepareFeatureBuffer() {
+        for (const message of this.capturedCollector) {
+            this.targetFlag = nextFlag(this.runtime, this.rocket)
+        }
+
+        const dx =
+            this.rocket.components.rigidBody.translation().x -
+            this.targetFlag.components.level.flag.x
+        const dy =
+            this.rocket.components.rigidBody.translation().y -
+            this.targetFlag.components.level.flag.y
+
+        const inCapture = this.targetFlag.components.level.inCapture
+
+        this.observationFeatureBuffer.writeFloatLE(this.rocket.components.rigidBody.linvel().x, 0)
+        this.observationFeatureBuffer.writeFloatLE(this.rocket.components.rigidBody.linvel().y, 4)
+        this.observationFeatureBuffer.writeFloatLE(this.rotation, 8)
+        this.observationFeatureBuffer.writeFloatLE(dx, 12)
+        this.observationFeatureBuffer.writeFloatLE(dy, 16)
+        this.observationFeatureBuffer.writeFloatLE(inCapture ? 1 : 0, 20)
+    }
+}
+
+function nextFlag(runtime: Runtime, rocket: EntityWith<RuntimeComponents, "rocket" | "rigidBody">) {
+    const distanceToFlag = (flagEntity: EntityWith<RuntimeComponents, "level">) => {
+        const dx = rocket.components.rigidBody.translation().x - flagEntity.components.level.flag.x
+        const dy = rocket.components.rigidBody.translation().y - flagEntity.components.level.flag.y
+        return Math.sqrt(dx * dx + dy * dy)
+    }
+
+    const nextLevel = runtime.factoryContext.store
+        .find("level")
+        .filter(level => !level.components.level.captured)
+        .map(level => [level, distanceToFlag(level)] as const)
+        .reduce(([minLevel, minDistance], [level, distance]) =>
+            distance < minDistance ? [level, distance] : [minLevel, minDistance],
+        )[0]
+
+    return nextLevel
 }
 
 global.navigator = { userAgent: "node" } as any
 
+/*
 const worldStr2 =
     "ClwKBkdsb2JhbBJSEigNzcxUwBXJdsBBJQAA7MEtAADKQTUAAO5BPQAAmMBFAAAAQE0AAABAGiYKJAAANEEAAEA/AAD/AODPAACAgP8AAABAxMDA/wDgTwC0////AAo1CgJGMRIvEi0NMzMbQBWLbFdAHdsPyUAlAADswS0AALhANQAA7kE9AACYwEUAAABATQAAAEAKEgoCRzESDAoKDWZmDsEVZmbEQQoSCgJHMhIMCgoNZmYKwRVmZsJBChIKAkczEgwKCg1mZma/FWZmwkEKEgoCRzQSDAoKDWZmRkAVZmbEQQo1CgJGMhIvEi0NzcwywRWLbFdAHdsPyUAlAACawS0AAMpBNQAAIEE9AACYwEUAAABATQAAAEASHAoITm9ybWFsIDESEAoCRzEKAkYxCgZHbG9iYWwSHAoITm9ybWFsIDISEAoCRzIKAkYxCgZHbG9iYWwSHAoITm9ybWFsIDMSEAoCRzMKAkYxCgZHbG9iYWwSHAoITm9ybWFsIDQSEAoCRzQKAkYxCgZHbG9iYWwSHAoITm9ybWFsIDUSEAoCRjIKAkcxCgZHbG9iYWwSHAoITm9ybWFsIDYSEAoCRjIKAkcyCgZHbG9iYWwSHAoITm9ybWFsIDcSEAoCRjIKAkczCgZHbG9iYWwSHAoITm9ybWFsIDgSEAoCRzQKAkYyCgZHbG9iYWw="
 const worldStr =
@@ -141,22 +208,27 @@ const worldStr =
 const worldStr3 =
     "CscCCgZOb3JtYWwSvAIKCg2F65XBFTXTGkISKA2kcLrBFZfjFkIlAAAAwi1SuIlCNa5H+UE9H4X/QUUAAABATQAAAEASKA1SuMFBFZmRGkIlhetRQS3NzFJCNSlcp0I9zcxEQUUAAABATQAAAEASKA0AgEVCFfIboEElAAAoQi0K189BNaRw4UI9rkdZwUUAAABATQAAAEASKA171MBCFcubHcElmpm5Qi0K189BNY/CI0M9rkdZwUUAAABATQAAAEASLQ1syOFCFToytkEdVGuzOiWamblCLSlcZUI1XI8jQz3NzIhBRQAAAEBNAAAAQBItDR/lAUMVk9VNQh2fUDa1JaRw9UItexRsQjWF60FDPQAAlEFFAAAAQE0AAABAEigNw1UzQxVpqkFCJdejJEMtBW94QjXXo0JDPQVvAEJFAAAAQE0AAABACu4KCg1Ob3JtYWwgU2hhcGVzEtwKGt8GCtwGP4UAws3MNEGgEEAAZjYAAP///wB1PAAU////AF5PABT///8AyUtPxP///wAzSg3L////AMBJAcj///8AE0Umzf///wCMVAo5////AJNRpDr///8AVE0WVP///wD0vlZLAAD/AEPI7Bn///8AhcPlOAAA/wAFQZrF////ADS9F8f///8AJMIuwf///wC5xvvF////AOrJ1rf///8Ac8ikQP///wBAxfRF////AGkxi0n///8Aj0LxQgAA/wB1xWY9////AJ/HZAlQUP4AzcUBvQAA/wDwQFzE////ADDGR73///8As8eZPoiI8QBxxWQ3rKz/AFw3LMQAAP8AwkNRtP///wC2RKO4////AEhBe8EAAP8AS0WPPP///wAdSaSx////AMw/Ucj///8A7MBNxv///wDmxnG9////AELCFLr///8Aw8UOof///wAKxCg4AAD/ALg8OMDZ2fsA4j9NwP///wCkxB+/AADwAHGwrr54ePgAVERcwv///wAPwXbA////APW0H0EAAPgASLtnv////wALM67DJSX/AFJApL////8AZj4uwP///wBcu+HATU3/AIU7+8H///8AXMK8Lf///wB7wjM/AAD4AHDCx8D///8AFEH7wP///wAAvnvE////AOTGChL///8A6bncRP///wCAQddAAAD4AB/AxLH///8AIL9RPQAA+ACZwqvG////AOLCLkQAAPgAIcTrwP///wDtwQPH////AOLJbqz///8ALsR6QwAA+AD+x8zA////APtF90kyMv8AH7mZQCcn/wCNxHo8tbX/AIDAiETKyv8AXEAgSgAA+AClyAqS////AH9EG0n///8AS0ypRP///wAxSIK7MDToANjBdUf///8A58yjxP///wCByD1EMDToAIzCYMv///8AnMq3MzA06AC+QenF////ANzGT0T///8AtMFSR////wBzRb85lpj/AFJALEQwNOgAqMIpPjA06AAgyiCF////AAPEE77///8AzT4FSnN1/wAzxWFCMDToAA23PcKXl/8AGcLmQDA06ADMPUnJu77/AFrGxsL///8A1TRGSjA06ACKwik8MDToAE3Apcn///8Ar8SawP///wBsygqP////ABHI8z0wNOgAAABTzv///wAa9wMK9APNzJNCj8JlQP///wBmtly8////ABa2jsg2Nv8AO0SENwAA+ACkvrtEvLz/AG0uOEX///8A4UaHPv///wA+QlXFAAD4AApB2L4AAPgAeDLVRP///wATSHHAAAD4ADhA3EP///8As0MKvAAA8ADOPxM4AAD4AEjBTUD///8Arj5TP3B0+ACyKw9DaGz4ALm6eDz///8AKT4MSP///wDhPy5CAAD/APS/XEL///8A+EV6PwAA/wAdsXtBp6f/AGzEpEEAAP8AisfEuf///wDXwVJI////AJpEaUf///8AhUfxQP///wB7RA3FAAD/ANdBTzUAAP8AC8C9Rv///wBGQoVE////APRMpDz///8A7kS3yAAA/wDLR9HB////AFLHNscAAP8AR0HNwf///wDsvtLGAAD/AABE5kD///8AD0JIRv///wD0RNJA////AEVFqcD///8A3ESpwwAA/wAuwgtJ////AARBqEj///8ALUdbSf///wA01Hks////AHjCAL3///8AF8s5x////wC4vlPP////AME1O8f///8AhsIAPgAA+ABcxZXC7e3/AIrEpUMAAPgAjcbDxcvL/wBdQFzF////AEjI+8EAAOAAQ0GZvf///wAGN77AFRX/APlFXDz///8AikEzwkhI+ADcQmoy////AArNAgoHUmV2ZXJzZRLBAgoPDRydLkMVk5lFQh2z7Zk2EigNpHC6wRWX4xZCJQAAAMItAABMQjUAAEDBPR+F/0FFAAAAQE0AAABAEigNUrjBQRWZkRpCJR+FAMItZuaJQjUAAPpBPQAAAEJFAAAAQE0AAABAEigNAIBFQhXyG6BBJQAAUEEthetRQjWkcKdCPVK4TkFFAAAAQE0AAABAEigNe9TAQhXLmx3BJTQzKEItCtfPQTUeBeJCPa5HWcFFAAAAQE0AAABAEi0NbMjhQhU6MrZBHVRrszolmpm5Qi1SuNRBNVyPI0M9ZmZawUUAAABATQAAAEASLQ0f5QFDFZPVTUIdn1A2tSWk8LlCLXsUZUI1hSskQz0AAIZBRQAAAEBNAAAAQBIoDcNVM0MVaapBQiUAgPVCLQAAbEI1AABCQz0AAJRBRQAAAEBNAAAAQBIhCgZOb3JtYWwSFwoNTm9ybWFsIFNoYXBlcwoGTm9ybWFsEiMKB1JldmVyc2USGAoNTm9ybWFsIFNoYXBlcwoHUmV2ZXJzZQ=="
 
-const world = WorldModel.decode(Buffer.from(worldStr3, "base64"))
-const runtime = newRuntime(RAPIER as any, world, "Normal")
+const world = WorldModel.decode(Buffer.from(worldStr, "base64"))
 
-const env = new GameEnvironment({
-    width: 64,
-    height: 64,
-    fps: 60,
-})
+const env = new GameEnvironment(
+    world,
+    "Normal",
+    {
+        width: 64,
+        height: 64,
+        stepsPerFrame: 4,
+    },
+    game => new DefaultGameReward(game),
+)
 
 const png = new PNG({
     width: 64,
     height: 64,
 })
 
-for (let i = 0; i < 10; ++i) {
-    const [image] = env.step(Buffer.from([5]))
+for (let i = 0; i < 30; ++i) {
+    const [r, , image] = env.step(Buffer.from([5]))
+    console.log(r)
     png.data.set(image)
     fs.writeFileSync(
         `imgs/output${i}.png`,
@@ -167,3 +239,4 @@ for (let i = 0; i < 10; ++i) {
 fs.writeFileSync("output.png", PNG.sync.write(png, { colorType: 2, inputHasAlpha: false }))
 
 process.exit(0)
+*/
diff --git a/packages/learning/package.json b/packages/learning/package.json
index e1a070a9..63a35be8 100644
--- a/packages/learning/package.json
+++ b/packages/learning/package.json
@@ -21,12 +21,14 @@
     "dependencies": {
         "@dimforge/rapier2d": "^0.12.0",
         "@tensorflow/tfjs": "^4.19.0",
-        "@tensorflow/tfjs-backend-webgl": "^4.19.0-rc.0",
-        "@tensorflow/tfjs-backend-webgpu": "^4.19.0-rc.0",
+        "@tensorflow/tfjs-backend-webgl": "^4.19.0",
+        "@tensorflow/tfjs-backend-webgpu": "^4.19.0",
+        "@tensorflow/tfjs-node": "^4.19.0",
         "@types/prompts": "^2.4.9",
         "@types/sat": "^0.0.35",
         "@types/three": "^0.164.0",
         "eslint-config-custom": "*",
+        "learning-gym": "*",
         "lil-gui": "^0.19.2",
         "poly-decomp-es": "^0.4.2",
         "ppo-tfjs": "^0.0.2",
diff --git a/packages/learning/src/alt-main.ts b/packages/learning/src/alt-main.ts
new file mode 100644
index 00000000..b6e72b96
--- /dev/null
+++ b/packages/learning/src/alt-main.ts
@@ -0,0 +1,906 @@
+import * as tf from "@tensorflow/tfjs"
+import { Buffer } from "buffer"
+import { EntityWith, MessageCollector } from "runtime-framework"
+import { WorldModel } from "runtime/proto/world"
+import { LevelCapturedMessage } from "runtime/src/core/level-capture/level-captured-message"
+import { RocketDeathMessage } from "runtime/src/core/rocket/rocket-death-message"
+import { RuntimeComponents } from "runtime/src/core/runtime-components"
+import { Runtime, newRuntime } from "runtime/src/runtime"
+import { Environment, PPO } from "./ppo/ppo"
+
+export class PolyburnEnvironment implements Environment {
+    private runtime: Runtime
+    private currentRotation: number
+    private nearestLevel: EntityWith<RuntimeComponents, "level">
+
+    private captureMessages: MessageCollector<LevelCapturedMessage>
+    private deathMessages: MessageCollector<RocketDeathMessage>
+
+    private bestDistance: number
+    private maxTime = 60 * 30
+    private remainingTime = 60 * 30
+
+    private worldModel: any
+
+    private touchedFlag = false
+
+    constructor() {
+        const worldStr2 =
+            "ClwKBkdsb2JhbBJSEigNzcxUwBXJdsBBJQAA7MEtAADKQTUAAO5BPQAAmMBFAAAAQE0AAABAGiYKJAAANEEAAEA/AAD/AODPAACAgP8AAABAxMDA/wDgTwC0////AAo1CgJGMRIvEi0NMzMbQBWLbFdAHdsPyUAlAADswS0AALhANQAA7kE9AACYwEUAAABATQAAAEAKEgoCRzESDAoKDWZmDsEVZmbEQQoSCgJHMhIMCgoNZmYKwRVmZsJBChIKAkczEgwKCg1mZma/FWZmwkEKEgoCRzQSDAoKDWZmRkAVZmbEQQo1CgJGMhIvEi0NzcwywRWLbFdAHdsPyUAlAACawS0AAMpBNQAAIEE9AACYwEUAAABATQAAAEASHAoITm9ybWFsIDESEAoCRzEKAkYxCgZHbG9iYWwSHAoITm9ybWFsIDISEAoCRzIKAkYxCgZHbG9iYWwSHAoITm9ybWFsIDMSEAoCRzMKAkYxCgZHbG9iYWwSHAoITm9ybWFsIDQSEAoCRzQKAkYxCgZHbG9iYWwSHAoITm9ybWFsIDUSEAoCRjIKAkcxCgZHbG9iYWwSHAoITm9ybWFsIDYSEAoCRjIKAkcyCgZHbG9iYWwSHAoITm9ybWFsIDcSEAoCRjIKAkczCgZHbG9iYWwSHAoITm9ybWFsIDgSEAoCRzQKAkYyCgZHbG9iYWw="
+
+        const worldStr =
+            "CqAJCgZOb3JtYWwSlQkKDw0fhZ3BFR+FB0Id2w/JQBItDR+FtsEVgZUDQh3bD8lAJQAAEMItpHBhQjWuR9lBPR+Fm0FFAAAAQE0AAABAEi0Nrkc/QRVt5wZCHdsPyUAlAAD4QC2kcBZCNezRjUI94KMwP0UAAABATQAAAEASLQ2k8B5CFX9qWEEd2w/JQCUAAP5BLaRwFkI17NG9Qj3gozA/RQAAAEBNAAAAQBItDeyRm0IVPzWGQR3bD8lAJQCAjUItSOHsQTX26AVDPYTr6cBFAAAAQE0AAABAEi0Nw0XwQhUcd4lAHTMeejwlAIDnQi2kcA5CNfboMkM9EK6nv0UAAABATQAAAEASLQ2PYhxDFT813EEd2w/JQCUAAM9CLaRwbEI1AMAmQz0fhbFBRQAAAEBNAAAAQBItDcM15UIVYxBJQh3bD8lAJQAAeUItUrijQjXs0fpCPZDCM0JFAAAAQE0AAABAEi0N9WiFQhXVeIhCHdsPyUAlw7WBQi3sUY9CNcO1kUI9AACBQkUAAABATQAAAEAaTgpMpHA9wXE9ukHAwP8AAEAAPYCA/wAAtIBDAAD/AIDFAEBAQP8AgMgAAICA/wBAxgC+oKD/AABGAMf///8AV0dxQry8+QBSQPHA////ABpOCkyuR3FBSOHKQf/++ABAxgAA//3wAAA/QMT/++AAQEoAQv/3wAAAPkBF/++AAADHAD//3gAAgMYAAP/vgAAAAIDD////AKxGCq////8AGpcCCpQC9qjBQpqZJEL///8AMNEAOv///wDqy9pH////AOzHNML///8AAMIAx////wAAQkDE////AABFAL3///8AAELAx////wCARgBF////AEBGgMb///8AwEYAv////wAgSQBF////AOBIgMP///8A4EjAR////wAARYDE////AAC+oMj///8AAD8AAP///wAAAODK////AGBJAEf///8AwMTASP///wAgSQAA////AEBEwMb///8AAEOAQ////wBASQC/////AAA+wEj///8AwEqAw////wAAvMBL////AODIAAD///8AQMoAQP///wAAPgBI////ACDIAAD///8AgMCARv///wCAyQAA////AEBFgMb///8AGqcCCqQCpHAZQqRwOcH///8AmFgAwP///wCAxwhU////AGDK4E3///8AwM1gyf///wAAv+DI////AKBLAMP///8AADpgyf///wCARgAA////AAA6YMv///8AQMgAAP///wAAvuDJ////AIBFYMj///8AQMyAwf///wAAtMDG////AGDLAL3///8AOMAMSP///wAkxgCu////AADC4Mj///8AAMNARv///wBgyQAA////AEDHgMP///8AwMeAQf///wAAAEBM////ACDJAAD///8AgMMAx////wAAyoBC////AAC9AMb///8AgMTARf///wCAwIDB////AABFAML///8AAMgANP///wBAxEBG////AADHAAD///8AAMFAyP///wBgyEDE////ABomCiSPQopCcT2DQv/AjQAAxAAA/+R0AAAAAMT/kwAAAEQAAP+bAAASEgoGTm9ybWFsEggKBk5vcm1hbA=="
+
+        this.worldModel = WorldModel.decode(Buffer.from(worldStr, "base64"))
+
+        this.runtime = newRuntime(this.worldModel, "Normal")
+
+        this.currentRotation = 0
+
+        const rocket = this.runtime.factoryContext.store.find("rocket", "rigidBody")[0]
+        const rocketPosition = rocket.components.rigidBody.translation()
+
+        this.captureMessages = this.runtime.factoryContext.messageStore.collect("levelCaptured")
+        this.deathMessages = this.runtime.factoryContext.messageStore.collect("rocketDeath")
+
+        this.nearestLevel = this.runtime.factoryContext.store
+            .find("level")
+            .filter(level => level.components.level.captured === false)
+            .sort(
+                (a, b) =>
+                    Math.abs(a.components.level.flag.x - rocketPosition.x) -
+                    Math.abs(b.components.level.flag.y - rocketPosition.x),
+            )[0]
+
+        const { distance } = this.state()
+        this.bestDistance = distance
+    }
+
+    step(action: number | number[]): [number[], number, boolean] {
+        if (Array.isArray(action)) {
+            action = action[0]
+        }
+
+        this.remainingTime--
+        let thrust = false
+
+        switch (action) {
+            case 0:
+                break
+            case 1:
+                thrust = true
+                break
+            case 2:
+                this.currentRotation += 0.05
+                break
+            case 3:
+                thrust = true
+                this.currentRotation += 0.05
+                break
+            case 4:
+                this.currentRotation -= 0.05
+                break
+            case 5:
+                thrust = true
+                this.currentRotation -= 0.05
+                break
+            default:
+                throw new Error("Wrong action")
+        }
+
+        this.runtime.step({
+            thrust,
+            rotation: this.currentRotation,
+        })
+
+        const { distance, observation, velMag, angDiff } = this.state()
+
+        let newTouch = false
+
+        if (this.nearestLevel.components.level.inCapture) {
+            if (!this.touchedFlag) {
+                newTouch = true
+            }
+
+            this.touchedFlag = true
+        }
+
+        const captureMessage = [...this.captureMessages].at(-1)
+
+        if (captureMessage) {
+            const reward = 10000 + (this.maxTime - this.remainingTime) * 100
+            return [observation, reward, true]
+        }
+
+        const deathMessage = [...this.deathMessages].at(-1)
+
+        if (deathMessage) {
+            const reward = -velMag * 10 - angDiff * 10
+            return [observation, reward, true]
+        }
+
+        if (this.remainingTime <= 0) {
+            return [observation, -3000, true]
+        }
+
+        const reward = Math.max(0, this.bestDistance - distance)
+        this.bestDistance = Math.min(this.bestDistance, distance)
+
+        const done = this.remainingTime <= 0
+
+        return [
+            observation,
+            reward * 10 + (newTouch ? 100 : 0) + 100 * (this.touchedFlag ? 1 : 0),
+            done,
+        ]
+    }
+
+    state() {
+        const rocket = this.runtime.factoryContext.store.find("rocket", "rigidBody")[0]
+
+        const rocketPosition = rocket.components.rigidBody.translation()
+        const rocketRotation = rocket.components.rigidBody.rotation()
+        const rocketVelocity = rocket.components.rigidBody.linvel()
+
+        const dx = this.nearestLevel.components.level.flag.x - rocketPosition.x
+        const dy = this.nearestLevel.components.level.flag.y - rocketPosition.y
+
+        const distanceToLevel = Math.sqrt(dx * dx + dy * dy)
+
+        const angDiff =
+            (this.nearestLevel.components.level.flagRotation -
+                rocket.components.rigidBody.rotation()) %
+            (Math.PI * 2)
+
+        const velMag = Math.sqrt(
+            rocketVelocity.x * rocketVelocity.x + rocketVelocity.y * rocketVelocity.y,
+        )
+
+        return {
+            distance: distanceToLevel,
+            observation: [
+                this.nearestLevel.components.level.flag.x - rocketPosition.x,
+                this.nearestLevel.components.level.flag.y - rocketPosition.y,
+                rocketRotation,
+                rocketVelocity.x,
+                rocketVelocity.y,
+            ],
+            touched: this.touchedFlag,
+            angDiff,
+            velMag,
+        }
+    }
+
+    reset(): number[] {
+        this.runtime = newRuntime(this.worldModel, "Normal")
+
+        this.currentRotation = 0
+
+        const rocket = this.runtime.factoryContext.store.find("rocket", "rigidBody")[0]
+        const rocketPosition = rocket.components.rigidBody.translation()
+
+        this.captureMessages = this.runtime.factoryContext.messageStore.collect("levelCaptured")
+        this.deathMessages = this.runtime.factoryContext.messageStore.collect("rocketDeath")
+
+        this.nearestLevel = this.runtime.factoryContext.store
+            .find("level")
+            .filter(level => level.components.level.captured === false)
+            .sort(
+                (a, b) =>
+                    Math.abs(a.components.level.flag.x - rocketPosition.x) -
+                    Math.abs(b.components.level.flag.y - rocketPosition.x),
+            )[0]
+
+        const { distance, observation } = this.state()
+
+        this.bestDistance = distance
+        this.remainingTime = this.maxTime
+        this.touchedFlag = false
+
+        return observation
+    }
+}
+
+export class CartPole implements Environment {
+    private gravity: number
+    private massCart: number
+    private massPole: number
+    private totalMass: number
+    private cartWidth: number
+    private cartHeight: number
+    private length: number
+    private poleMoment: number
+    private forceMag: number
+    private tau: number
+
+    private xThreshold: number
+    private thetaThreshold: number
+
+    private x: number = 0
+    private xDot: number = 0
+    private theta: number = 0
+    private thetaDot: number = 0
+
+    /**
+     * Constructor of CartPole.
+     */
+    constructor() {
+        // Constants that characterize the system.
+        this.gravity = 9.8
+        this.massCart = 1.0
+        this.massPole = 0.1
+        this.totalMass = this.massCart + this.massPole
+        this.cartWidth = 0.2
+        this.cartHeight = 0.1
+        this.length = 0.5
+        this.poleMoment = this.massPole * this.length
+        this.forceMag = 10.0
+        this.tau = 0.02 // Seconds between state updates.
+
+        // Threshold values, beyond which a simulation will be marked as failed.
+        this.xThreshold = 2.4
+        this.thetaThreshold = (12 / 360) * 2 * Math.PI
+
+        this.reset()
+    }
+
+    /**
+     * Get current state as a tf.Tensor of shape [1, 4].
+     */
+    getStateTensor() {
+        return [this.x, this.xDot, this.theta, this.thetaDot]
+    }
+
+    private i = 0
+    private max = 0
+
+    /**
+     * Update the cart-pole system using an action.
+     * @param {number} action Only the sign of `action` matters.
+     *   A value > 0 leads to a rightward force of a fixed magnitude.
+     *   A value <= 0 leads to a leftward force of the same fixed magnitude.
+     */
+    step(action: number | number[]): [number[], number, boolean] {
+        if (Array.isArray(action)) {
+            action = action[0]
+        }
+
+        const force = action * this.forceMag
+
+        const cosTheta = Math.cos(this.theta)
+        const sinTheta = Math.sin(this.theta)
+        ++this.i
+
+        const temp =
+            (force + this.poleMoment * this.thetaDot * this.thetaDot * sinTheta) / this.totalMass
+        const thetaAcc =
+            (this.gravity * sinTheta - cosTheta * temp) /
+            (this.length * (4 / 3 - (this.massPole * cosTheta * cosTheta) / this.totalMass))
+        const xAcc = temp - (this.poleMoment * thetaAcc * cosTheta) / this.totalMass
+
+        // Update the four state variables, using Euler's method.
+        this.x += this.tau * this.xDot
+        this.xDot += this.tau * xAcc
+        this.theta += this.tau * this.thetaDot
+        this.thetaDot += this.tau * thetaAcc
+
+        let reward = 0
+
+        if (this.isDone()) {
+            reward = -100
+        } else {
+            reward = 1
+        }
+
+        return [this.getStateTensor(), reward, this.isDone()]
+    }
+
+    /**
+     * Set the state of the cart-pole system randomly.
+     */
+    reset() {
+        this.i = 0
+        // The control-theory state variables of the cart-pole system.
+        // Cart position, meters.
+        this.x = Math.random() - 0.5
+        // Cart velocity.
+        this.xDot = (Math.random() - 0.5) * 1
+        // Pole angle, radians.
+        this.theta = (Math.random() - 0.5) * 2 * ((6 / 360) * 2 * Math.PI)
+        // Pole angle velocity.
+        this.thetaDot = (Math.random() - 0.5) * 0.5
+
+        return this.getStateTensor()
+    }
+
+    /**
+     * Determine whether this simulation is done.
+     *
+     * A simulation is done when `x` (position of the cart) goes out of bound
+     * or when `theta` (angle of the pole) goes out of bound.
+     *
+     * @returns {bool} Whether the simulation is done.
+     */
+    isDone() {
+        return (
+            this.x < -this.xThreshold ||
+            this.x > this.xThreshold ||
+            this.theta < -this.thetaThreshold ||
+            this.theta > this.thetaThreshold
+        )
+    }
+}
+
+import "@tensorflow/tfjs-backend-webgl"
+import "@tensorflow/tfjs-backend-webgpu"
+import { SoftActorCritic } from "./soft-actor-critic/soft-actor-critic"
+
+if (false) {
+    tf.setBackend("webgl").then(() => {
+        // const env = new PolyburnEnvironment()
+        const env = new CartPole()
+        const envTest = new CartPole()
+
+        const sac = new SoftActorCritic({
+            mlpSpec: {
+                sizes: [32, 32],
+                activation: "relu",
+                outputActivation: "relu",
+            },
+
+            actionSize: 1,
+            observationSize: 4,
+
+            maxEpisodeLength: 1000,
+            bufferSize: 10_000,
+            batchSize: 128,
+            updateAfter: 1000,
+            updateEvery: 50,
+
+            learningRate: 1e-3,
+            alpha: 0.2,
+            gamma: 0.99,
+            polyak: 0.995,
+        })
+
+        /*
+        function possibleLifetime() {
+            let observation = env.reset()
+
+            let totalReward = 0
+            const inputs = []
+
+            while (true) {
+                const action = sac.act(observation, true)
+                inputs.push(env.inputFromAction(action as number[]))
+
+                const [nextObservation, reward, done] = env.step(action)
+
+                totalReward += reward
+                observation = nextObservation
+
+                if (done) {
+                    break
+                }
+            }
+
+            return {
+                totalReward,
+                touched: env.state().touched,
+                distance: env.state().distance,
+                inputs,
+            }
+        }
+
+        sac.learn(env, {
+            stepsPerEpoch: 100,
+            epochs: 1000,
+            onFirstEpisodeInEpoch() {
+                const lt = possibleLifetime()
+                console.log(
+                    `Reward: ${lt.totalReward}, touched: ${lt.touched}, distance: ${lt.distance}`,
+                )
+            },
+        })
+        */
+
+        sac.learn(env, {
+            stepsPerEpoch: 100,
+            epochs: 1000,
+            onEpochFinish() {
+                let observation = envTest.reset()
+                let t = 0
+
+                while (t < 1000) {
+                    const action = sac.act(observation, true)
+                    const [nextObservation, , done] = envTest.step(action)
+
+                    if (done) {
+                        break
+                    }
+
+                    observation = nextObservation
+                    t++
+                }
+
+                console.log("Length: ", t)
+            },
+        })
+
+        function iteration() {
+            requestAnimationFrame(iteration)
+        }
+
+        requestAnimationFrame(iteration)
+
+        return
+        fetch("http://localhost:5173/batches.json")
+            .then(r =>
+                r
+                    .json()
+                    .then(j => {
+                        const batches = JSON.parse(j)
+                        let i = 0
+
+                        function currentReward() {
+                            const acc = []
+
+                            for (let j = 0; j < 100; ++j) {
+                                env.reset()
+
+                                let x = 0
+
+                                while (!env.isDone() && x < 1000) {
+                                    env.step(sac.act(env.getStateTensor(), true))
+                                    x++
+                                }
+
+                                acc.push(x)
+                            }
+
+                            // average of top 10% lifetimes
+                            acc.sort((a, b) => b - a)
+
+                            const best10avg = acc.slice(0, 10).reduce((a, b) => a + b, 0) / 10
+                            const worst10avg = acc.slice(-10).reduce((a, b) => a + b, 0) / 10
+                            const avg = acc.reduce((a, b) => a + b, 0) / acc.length
+
+                            return { avg, best10avg, worst10avg }
+                        }
+
+                        for (const batch of batches) {
+                            sac.update({
+                                observation: tf.tensor2d(batch.observation),
+                                action: tf.tensor2d(batch.action),
+                                reward: tf.tensor1d(batch.reward),
+                                nextObservation: tf.tensor2d(batch.nextObservation),
+                                done: tf.tensor1d(batch.done),
+                            })
+
+                            console.log(`Batch ${i++} done`)
+                        }
+
+                        console.log("Reward: ", currentReward())
+
+                        console.log("Done")
+                    })
+                    .catch(e => {
+                        console.error(e)
+                    }),
+            )
+            .catch(e => {
+                console.error(e)
+            })
+
+        /*
+        const actor = new Actor(4, 2, {
+            sizes: [32, 32],
+            activation: "relu",
+            outputActivation: "relu",
+        })
+
+        actor.trainableWeights.forEach(w => {
+            w.write(tf.zeros(w.shape, w.dtype))
+        })
+
+        /*
+        x = torch.tensor([[0.1, 0.2, 0.3, 0.4]], dtype=torch.float32)
+        x = actor(x, True)
+
+        const x = tf.tensor2d([[0.1, 0.2, 0.3, 0.4]])
+        const r = actor.apply(x, { deterministic: true }) as tf.Tensor<tf.Rank>[]
+
+        console.log(r[0].dataSync())
+        console.log(r[1].dataSync())
+        */
+    })
+}
+
+if (false) {
+    tf.setBackend("cpu").then(() => {
+        const env = new CartPole()
+
+        const sac = new SoftActorCritic({
+            mlpSpec: {
+                sizes: [32, 32],
+                activation: "relu",
+                outputActivation: "relu",
+            },
+
+            actionSize: 1,
+            observationSize: 4,
+
+            maxEpisodeLength: 1000,
+            bufferSize: 1e6,
+            batchSize: 100,
+            updateAfter: 1000,
+            updateEvery: 50,
+
+            learningRate: 1e-3,
+            alpha: 0.2,
+            gamma: 0.99,
+            polyak: 0.995,
+        })
+
+        function currentReward() {
+            const acc = []
+
+            for (let j = 0; j < 10; ++j) {
+                env.reset()
+
+                let x = 0
+
+                while (!env.isDone() && x < 1000) {
+                    env.step(sac.act(env.getStateTensor(), false))
+                    x++
+                }
+
+                acc.push(x)
+            }
+
+            // average of top 10% lifetimes
+            acc.sort((a, b) => b - a)
+
+            const best10avg = acc.slice(0, 10).reduce((a, b) => a + b, 0) / 10
+            const worst10avg = acc.slice(-10).reduce((a, b) => a + b, 0) / 10
+            const avg = acc.reduce((a, b) => a + b, 0) / acc.length
+
+            return { avg, best10avg, worst10avg }
+        }
+
+        let t = 0
+        let updated = false
+
+        function iteration() {
+            for (let i = 0; i < 16; ++i) {
+                t++
+
+                const observation = env.getStateTensor()
+
+                let action: number[]
+
+                if (t < 10_000) {
+                    action = [Math.random() * 2 - 1]
+                } else {
+                    action = sac.act(observation, false)
+                }
+
+                const [nextObservation, reward, done] = env.step(action)
+
+                const thisTimeUpdated = sac.observe({
+                    observation,
+                    action,
+                    reward,
+                    nextObservation,
+                    done,
+                })
+
+                updated ||= thisTimeUpdated
+
+                if (done) {
+                    if (updated) {
+                        const { avg, best10avg, worst10avg } = currentReward()
+
+                        console.log(`Leaks: ${tf.memory().numTensors}`)
+                        console.log(`10%: ${best10avg}, 90%: ${worst10avg}, avg: ${avg}`)
+                    }
+
+                    env.reset()
+
+                    updated = false
+                }
+            }
+
+            requestAnimationFrame(iteration)
+        }
+
+        console.log("Start")
+        requestAnimationFrame(iteration)
+
+        /*
+        const ppo = new PPO(
+            {
+                steps: 512,
+                epochs: 15,
+                policyLearningRate: 1e-3,
+                valueLearningRate: 1e-3,
+                clipRatio: 0.1,
+                targetKL: 0.01,
+                gamma: 0.99,
+                lambda: 0.95,
+                observationDimension: 4,
+                actionSpace: {
+                    class: "Discrete",
+                    len: 2,
+                },
+            },
+            env,
+            tf.sequential({
+                layers: [
+                    tf.layers.dense({
+                        inputDim: 4,
+                        units: 32,
+                        activation: "relu",
+                    }),
+                    tf.layers.dense({
+                        units: 32,
+                        activation: "relu",
+                    }),
+                ],
+            }),
+            tf.sequential({
+                layers: [
+                    tf.layers.dense({
+                        inputDim: 4,
+                        units: 32,
+                        activation: "relu",
+                    }),
+                    tf.layers.dense({
+                        units: 32,
+                        activation: "relu",
+                    }),
+                ],
+            }),
+        )
+
+        function possibleLifetime() {
+            const acc = []
+
+            for (let j = 0; j < 25; ++j) {
+                env.reset()
+
+                let t = 0
+
+                while (!env.isDone() && t < 1000) {
+                    env.step(ppo.act(env.getStateTensor()) as number[])
+                    t++
+                }
+
+                acc.push(t)
+            }
+
+            // average of top 10% lifetimes
+            acc.sort((a, b) => b - a)
+
+            const best10avg = acc.slice(0, 10).reduce((a, b) => a + b, 0) / 10
+            const worst10avg = acc.slice(-10).reduce((a, b) => a + b, 0) / 10
+            const avg = acc.reduce((a, b) => a + b, 0) / acc.length
+
+            return { avg, best10avg, worst10avg }
+        }
+
+        let currentAverage = 0
+        let i = 0
+
+        function iteration() {
+            ppo.learn(512 * i)
+
+            const { avg, best10avg, worst10avg } = possibleLifetime()
+
+            console.log(`Leaks: ${tf.memory().numTensors}`)
+            console.log(`10%: ${best10avg}, 90%: ${worst10avg}, avg: ${avg}`)
+
+            if (avg > currentAverage) {
+                // await ppo.save()
+                currentAverage = avg
+                console.log("Saved")
+            }
+
+            i++
+
+            requestAnimationFrame(iteration)
+        }
+
+        console.log("Initial: ", possibleLifetime())
+
+        console.log("Start")
+        requestAnimationFrame(iteration)
+
+    */
+    })
+}
+
+if (true) {
+    tf.setBackend("webgl").then(() => {
+        const env = new PolyburnEnvironment()
+
+        class SplitLayer extends tf.layers.Layer {
+            constructor(private left: number) {
+                super({})
+            }
+
+            call(inputs: tf.Tensor): tf.Tensor[] {
+                const len = inputs.shape[1]
+
+                if (len === undefined) {
+                    throw new Error("Input is too short")
+                }
+
+                return tf.split(inputs, [this.left, len - this.left], 1)
+            }
+        }
+
+        function model() {
+            const featureCount = 6
+
+            const width = 64
+            const height = 64
+
+            const input = tf.input({ shape: [width * height * 3 + featureCount] })
+
+            let [addedFeatures, image] = new SplitLayer(featureCount).apply(
+                input,
+            ) as tf.SymbolicTensor[]
+
+            image = tf.layers
+                .conv2d({
+                    filters: 16,
+                    kernelSize: 8,
+                    strides: 4,
+                    activation: "relu",
+                })
+                .apply(image) as tf.SymbolicTensor
+
+            image = tf.layers
+                .conv2d({
+                    filters: 32,
+                    kernelSize: 4,
+                    strides: 2,
+                    activation: "relu",
+                })
+                .apply(image) as tf.SymbolicTensor
+
+            image = tf.layers
+                .conv2d({
+                    filters: 32,
+                    kernelSize: 3,
+                    strides: 1,
+                    activation: "relu",
+                })
+                .apply(image) as tf.SymbolicTensor
+
+            const imageFlat = tf.layers.flatten().apply(image)
+
+            const imageReduced = tf.layers
+                .dense({ units: 256 })
+                .apply(imageFlat) as tf.SymbolicTensor
+
+            let features = tf.layers.concatenate().apply([imageReduced, addedFeatures])
+
+            features = tf.layers
+                .dense({ units: 256, activation: "relu" })
+                .apply(features) as tf.SymbolicTensor
+
+            features = tf.layers
+                .dense({ units: 64, activation: "relu" })
+                .apply(features) as tf.SymbolicTensor
+
+            return tf.model({ inputs: input, outputs: features })
+        }
+
+        const ppo = new PPO(
+            {
+                steps: 512,
+                epochs: 20,
+                policyLearningRate: 1e-4,
+                valueLearningRate: 1e-4,
+                clipRatio: 0.2,
+                targetKL: 0.01,
+                gamma: 0.99,
+                lambda: 0.95,
+                observationDimension: 64 * 64 * 3 + 6,
+                actionSpace: {
+                    class: "Discrete",
+                    len: 6,
+                },
+            },
+            env,
+            model(),
+            model(),
+        )
+
+        function possibleLifetime() {
+            let observation = env.reset()
+
+            let totalReward = 0
+            const inputs: number[] = []
+
+            while (true) {
+                const action = dqn.act(observation)
+                const [nextObservation, reward, done] = env.step(action)
+
+                totalReward += reward
+                observation = nextObservation
+
+                if (done) {
+                    break
+                }
+            }
+
+            return {
+                totalReward,
+                touched: env.state().touched,
+                distance: env.state().distance,
+                inputs,
+            }
+        }
+
+        /*
+        function iteration() {
+            
+            const info = possibleLifetime()
+
+            console.log(
+                `Reward ${i}: reward(${info.totalReward}), distance(${info.distance}), touched(${info.touched})`,
+            )
+
+            if (info.totalReward > currentAverage && previousTwenty.length === 20) {
+                currentAverage = info.totalReward
+                console.log("Saved")
+            }
+
+            if (previousTwenty.length === 20) {
+                previousTwenty.shift()
+            }
+
+            previousTwenty.push(info.totalReward)
+
+            const avgPreviousTwenty =
+                previousTwenty.reduce((a, b) => a + b, 0) / previousTwenty.length
+
+            ++i
+
+            if (
+                avgPreviousTwenty < 50 &&
+                avgPreviousTwenty < Math.max(currentAverage, 10) * 0.5 &&
+                previousTwenty.length === 20
+            ) {
+                console.log("Restoring")
+
+                requestAnimationFrame(iteration)
+            } else {
+                requestAnimationFrame(iteration)
+            }
+        }
+
+        const { totalReward, inputs } = possibleLifetime()
+        currentAverage = totalReward
+
+        console.log(JSON.stringify(inputs))
+
+        console.log("Start with: ", currentAverage)
+        requestAnimationFrame(iteration)
+
+        /*
+        ppo.restore().finally(() => {
+            const { totalReward, inputs } = possibleLifetime()
+            currentAverage = totalReward
+
+            console.log(JSON.stringify(inputs))
+
+            console.log("Start with: ", currentAverage)
+            requestAnimationFrame(iteration)
+        })
+        */
+    })
+}
diff --git a/packages/learning/src/main.ts b/packages/learning/src/main.ts
index 2d00e62d..07adab2e 100644
--- a/packages/learning/src/main.ts
+++ b/packages/learning/src/main.ts
@@ -1,908 +1,157 @@
-import * as tf from "@tensorflow/tfjs"
-import { Buffer } from "buffer"
-import { EntityWith, MessageCollector } from "runtime-framework"
+import * as tf from "@tensorflow/tfjs-node"
+import { GameEnvironment } from "learning-gym/src/main"
 import { WorldModel } from "runtime/proto/world"
-import { LevelCapturedMessage } from "runtime/src/core/level-capture/level-captured-message"
-import { RocketDeathMessage } from "runtime/src/core/rocket/rocket-death-message"
-import { RuntimeComponents } from "runtime/src/core/runtime-components"
-import { Runtime, newRuntime } from "runtime/src/runtime"
-import { Environment } from "./ppo/ppo"
+import { DefaultGameReward } from "web-game/src/game/reward/default-reward"
+import { Environment, PPO } from "./ppo/ppo"
 
-export class PolyburnEnvironment implements Environment {
-    private runtime: Runtime
-    private currentRotation: number
-    private nearestLevel: EntityWith<RuntimeComponents, "level">
-
-    private captureMessages: MessageCollector<LevelCapturedMessage>
-    private deathMessages: MessageCollector<RocketDeathMessage>
-
-    private bestDistance: number
-    private maxTime = 60 * 30
-    private remainingTime = 60 * 30
-
-    private worldModel: any
-
-    private touchedFlag = false
-
-    constructor() {
-        const worldStr2 =
-            "ClwKBkdsb2JhbBJSEigNzcxUwBXJdsBBJQAA7MEtAADKQTUAAO5BPQAAmMBFAAAAQE0AAABAGiYKJAAANEEAAEA/AAD/AODPAACAgP8AAABAxMDA/wDgTwC0////AAo1CgJGMRIvEi0NMzMbQBWLbFdAHdsPyUAlAADswS0AALhANQAA7kE9AACYwEUAAABATQAAAEAKEgoCRzESDAoKDWZmDsEVZmbEQQoSCgJHMhIMCgoNZmYKwRVmZsJBChIKAkczEgwKCg1mZma/FWZmwkEKEgoCRzQSDAoKDWZmRkAVZmbEQQo1CgJGMhIvEi0NzcwywRWLbFdAHdsPyUAlAACawS0AAMpBNQAAIEE9AACYwEUAAABATQAAAEASHAoITm9ybWFsIDESEAoCRzEKAkYxCgZHbG9iYWwSHAoITm9ybWFsIDISEAoCRzIKAkYxCgZHbG9iYWwSHAoITm9ybWFsIDMSEAoCRzMKAkYxCgZHbG9iYWwSHAoITm9ybWFsIDQSEAoCRzQKAkYxCgZHbG9iYWwSHAoITm9ybWFsIDUSEAoCRjIKAkcxCgZHbG9iYWwSHAoITm9ybWFsIDYSEAoCRjIKAkcyCgZHbG9iYWwSHAoITm9ybWFsIDcSEAoCRjIKAkczCgZHbG9iYWwSHAoITm9ybWFsIDgSEAoCRzQKAkYyCgZHbG9iYWw="
-
-        const worldStr =
-            "CqAJCgZOb3JtYWwSlQkKDw0fhZ3BFR+FB0Id2w/JQBItDR+FtsEVgZUDQh3bD8lAJQAAEMItpHBhQjWuR9lBPR+Fm0FFAAAAQE0AAABAEi0Nrkc/QRVt5wZCHdsPyUAlAAD4QC2kcBZCNezRjUI94KMwP0UAAABATQAAAEASLQ2k8B5CFX9qWEEd2w/JQCUAAP5BLaRwFkI17NG9Qj3gozA/RQAAAEBNAAAAQBItDeyRm0IVPzWGQR3bD8lAJQCAjUItSOHsQTX26AVDPYTr6cBFAAAAQE0AAABAEi0Nw0XwQhUcd4lAHTMeejwlAIDnQi2kcA5CNfboMkM9EK6nv0UAAABATQAAAEASLQ2PYhxDFT813EEd2w/JQCUAAM9CLaRwbEI1AMAmQz0fhbFBRQAAAEBNAAAAQBItDcM15UIVYxBJQh3bD8lAJQAAeUItUrijQjXs0fpCPZDCM0JFAAAAQE0AAABAEi0N9WiFQhXVeIhCHdsPyUAlw7WBQi3sUY9CNcO1kUI9AACBQkUAAABATQAAAEAaTgpMpHA9wXE9ukHAwP8AAEAAPYCA/wAAtIBDAAD/AIDFAEBAQP8AgMgAAICA/wBAxgC+oKD/AABGAMf///8AV0dxQry8+QBSQPHA////ABpOCkyuR3FBSOHKQf/++ABAxgAA//3wAAA/QMT/++AAQEoAQv/3wAAAPkBF/++AAADHAD//3gAAgMYAAP/vgAAAAIDD////AKxGCq////8AGpcCCpQC9qjBQpqZJEL///8AMNEAOv///wDqy9pH////AOzHNML///8AAMIAx////wAAQkDE////AABFAL3///8AAELAx////wCARgBF////AEBGgMb///8AwEYAv////wAgSQBF////AOBIgMP///8A4EjAR////wAARYDE////AAC+oMj///8AAD8AAP///wAAAODK////AGBJAEf///8AwMTASP///wAgSQAA////AEBEwMb///8AAEOAQ////wBASQC/////AAA+wEj///8AwEqAw////wAAvMBL////AODIAAD///8AQMoAQP///wAAPgBI////ACDIAAD///8AgMCARv///wCAyQAA////AEBFgMb///8AGqcCCqQCpHAZQqRwOcH///8AmFgAwP///wCAxwhU////AGDK4E3///8AwM1gyf///wAAv+DI////AKBLAMP///8AADpgyf///wCARgAA////AAA6YMv///8AQMgAAP///wAAvuDJ////AIBFYMj///8AQMyAwf///wAAtMDG////AGDLAL3///8AOMAMSP///wAkxgCu////AADC4Mj///8AAMNARv///wBgyQAA////AEDHgMP///8AwMeAQf///wAAAEBM////ACDJAAD///8AgMMAx////wAAyoBC////AAC9AMb///8AgMTARf///wCAwIDB////AABFAML///8AAMgANP///wBAxEBG////AADHAAD///8AAMFAyP///wBgyEDE////ABomCiSPQopCcT2DQv/AjQAAxAAA/+R0AAAAAMT/kwAAAEQAAP+bAAASEgoGTm9ybWFsEggKBk5vcm1hbA=="
-
-        this.worldModel = WorldModel.decode(Buffer.from(worldStr, "base64"))
-
-        this.runtime = newRuntime(this.worldModel, "Normal")
-
-        this.currentRotation = 0
-
-        const rocket = this.runtime.factoryContext.store.find("rocket", "rigidBody")[0]
-        const rocketPosition = rocket.components.rigidBody.translation()
-
-        this.captureMessages = this.runtime.factoryContext.messageStore.collect("levelCaptured")
-        this.deathMessages = this.runtime.factoryContext.messageStore.collect("rocketDeath")
-
-        this.nearestLevel = this.runtime.factoryContext.store
-            .find("level")
-            .filter(level => level.components.level.captured === false)
-            .sort(
-                (a, b) =>
-                    Math.abs(a.components.level.flag.x - rocketPosition.x) -
-                    Math.abs(b.components.level.flag.y - rocketPosition.x),
-            )[0]
-
-        const { distance } = this.state()
-        this.bestDistance = distance
+class SplitLayer extends tf.layers.Layer {
+    computeOutputShape(inputShape: tf.Shape[]): tf.Shape[] {
+        return [inputShape[0], inputShape[0]]
     }
 
-    step(action: number | number[]): [number[], number, boolean] {
-        if (Array.isArray(action)) {
-            action = action[0]
-        }
-
-        this.remainingTime--
-        let thrust = false
-
-        switch (action) {
-            case 0:
-                break
-            case 1:
-                thrust = true
-                break
-            case 2:
-                this.currentRotation += 0.05
-                break
-            case 3:
-                thrust = true
-                this.currentRotation += 0.05
-                break
-            case 4:
-                this.currentRotation -= 0.05
-                break
-            case 5:
-                thrust = true
-                this.currentRotation -= 0.05
-                break
-            default:
-                throw new Error("Wrong action")
-        }
-
-        this.runtime.step({
-            thrust,
-            rotation: this.currentRotation,
-        })
-
-        const { distance, observation, velMag, angDiff } = this.state()
-
-        let newTouch = false
-
-        if (this.nearestLevel.components.level.inCapture) {
-            if (!this.touchedFlag) {
-                newTouch = true
-            }
-
-            this.touchedFlag = true
-        }
-
-        const captureMessage = [...this.captureMessages].at(-1)
-
-        if (captureMessage) {
-            const reward = 10000 + (this.maxTime - this.remainingTime) * 100
-            return [observation, reward, true]
-        }
+    constructor(private left: number) {
+        super()
+    }
 
-        const deathMessage = [...this.deathMessages].at(-1)
+    call(inputs: tf.Tensor): tf.Tensor[] {
+        console.error("inputs shape: ", inputs.shape)
 
-        if (deathMessage) {
-            const reward = -velMag * 10 - angDiff * 10
-            return [observation, reward, true]
-        }
+        const len = inputs.shape[1]
 
-        if (this.remainingTime <= 0) {
-            return [observation, -3000, true]
+        if (len === undefined) {
+            throw new Error("Input is too short")
         }
 
-        const reward = Math.max(0, this.bestDistance - distance)
-        this.bestDistance = Math.min(this.bestDistance, distance)
+        console.log("inputs shape: ", inputs.shape)
 
-        const done = this.remainingTime <= 0
-
-        return [
-            observation,
-            reward * 10 + (newTouch ? 100 : 0) + 100 * (this.touchedFlag ? 1 : 0),
-            done,
-        ]
+        return tf.split(inputs, [this.left, len - this.left], 1)
     }
 
-    state() {
-        const rocket = this.runtime.factoryContext.store.find("rocket", "rigidBody")[0]
-
-        const rocketPosition = rocket.components.rigidBody.translation()
-        const rocketRotation = rocket.components.rigidBody.rotation()
-        const rocketVelocity = rocket.components.rigidBody.linvel()
-
-        const dx = this.nearestLevel.components.level.flag.x - rocketPosition.x
-        const dy = this.nearestLevel.components.level.flag.y - rocketPosition.y
-
-        const distanceToLevel = Math.sqrt(dx * dx + dy * dy)
-
-        const angDiff =
-            (this.nearestLevel.components.level.flagRotation -
-                rocket.components.rigidBody.rotation()) %
-            (Math.PI * 2)
-
-        const velMag = Math.sqrt(
-            rocketVelocity.x * rocketVelocity.x + rocketVelocity.y * rocketVelocity.y,
-        )
-
-        return {
-            distance: distanceToLevel,
-            observation: [
-                this.nearestLevel.components.level.flag.x - rocketPosition.x,
-                this.nearestLevel.components.level.flag.y - rocketPosition.y,
-                rocketRotation,
-                rocketVelocity.x,
-                rocketVelocity.y,
-            ],
-            touched: this.touchedFlag,
-            angDiff,
-            velMag,
-        }
+    static get className() {
+        return "SplitLayer"
     }
+}
 
-    reset(): number[] {
-        this.runtime = newRuntime(this.worldModel, "Normal")
-
-        this.currentRotation = 0
-
-        const rocket = this.runtime.factoryContext.store.find("rocket", "rigidBody")[0]
-        const rocketPosition = rocket.components.rigidBody.translation()
-
-        this.captureMessages = this.runtime.factoryContext.messageStore.collect("levelCaptured")
-        this.deathMessages = this.runtime.factoryContext.messageStore.collect("rocketDeath")
-
-        this.nearestLevel = this.runtime.factoryContext.store
-            .find("level")
-            .filter(level => level.components.level.captured === false)
-            .sort(
-                (a, b) =>
-                    Math.abs(a.components.level.flag.x - rocketPosition.x) -
-                    Math.abs(b.components.level.flag.y - rocketPosition.x),
-            )[0]
-
-        const { distance, observation } = this.state()
+const worldStr2 =
+    "ClwKBkdsb2JhbBJSEigNzcxUwBXJdsBBJQAA7MEtAADKQTUAAO5BPQAAmMBFAAAAQE0AAABAGiYKJAAANEEAAEA/AAD/AODPAACAgP8AAABAxMDA/wDgTwC0////AAo1CgJGMRIvEi0NMzMbQBWLbFdAHdsPyUAlAADswS0AALhANQAA7kE9AACYwEUAAABATQAAAEAKEgoCRzESDAoKDWZmDsEVZmbEQQoSCgJHMhIMCgoNZmYKwRVmZsJBChIKAkczEgwKCg1mZma/FWZmwkEKEgoCRzQSDAoKDWZmRkAVZmbEQQo1CgJGMhIvEi0NzcwywRWLbFdAHdsPyUAlAACawS0AAMpBNQAAIEE9AACYwEUAAABATQAAAEASHAoITm9ybWFsIDESEAoCRzEKAkYxCgZHbG9iYWwSHAoITm9ybWFsIDISEAoCRzIKAkYxCgZHbG9iYWwSHAoITm9ybWFsIDMSEAoCRzMKAkYxCgZHbG9iYWwSHAoITm9ybWFsIDQSEAoCRzQKAkYxCgZHbG9iYWwSHAoITm9ybWFsIDUSEAoCRjIKAkcxCgZHbG9iYWwSHAoITm9ybWFsIDYSEAoCRjIKAkcyCgZHbG9iYWwSHAoITm9ybWFsIDcSEAoCRjIKAkczCgZHbG9iYWwSHAoITm9ybWFsIDgSEAoCRzQKAkYyCgZHbG9iYWw="
 
-        this.bestDistance = distance
-        this.remainingTime = this.maxTime
-        this.touchedFlag = false
+const world = WorldModel.decode(Buffer.from(worldStr2, "base64"))
 
-        return observation
-    }
-}
+const env = new GameEnvironment(
+    world,
+    "Normal 1",
+    {
+        stepsPerFrame: 4,
+        width: 64,
+        height: 64,
+    },
+    g => new DefaultGameReward(g),
+)
 
-export class CartPole implements Environment {
-    private gravity: number
-    private massCart: number
-    private massPole: number
-    private totalMass: number
-    private cartWidth: number
-    private cartHeight: number
-    private length: number
-    private poleMoment: number
-    private forceMag: number
-    private tau: number
-
-    private xThreshold: number
-    private thetaThreshold: number
-
-    private x: number = 0
-    private xDot: number = 0
-    private theta: number = 0
-    private thetaDot: number = 0
-
-    /**
-     * Constructor of CartPole.
-     */
-    constructor() {
-        // Constants that characterize the system.
-        this.gravity = 9.8
-        this.massCart = 1.0
-        this.massPole = 0.1
-        this.totalMass = this.massCart + this.massPole
-        this.cartWidth = 0.2
-        this.cartHeight = 0.1
-        this.length = 0.5
-        this.poleMoment = this.massPole * this.length
-        this.forceMag = 10.0
-        this.tau = 0.02 // Seconds between state updates.
-
-        // Threshold values, beyond which a simulation will be marked as failed.
-        this.xThreshold = 2.4
-        this.thetaThreshold = (12 / 360) * 2 * Math.PI
-
-        this.reset()
-    }
+const inputBuffer = Buffer.alloc(1)
 
-    /**
-     * Get current state as a tf.Tensor of shape [1, 4].
-     */
-    getStateTensor() {
-        return [this.x, this.xDot, this.theta, this.thetaDot]
-    }
+const envWrapped: Environment = {
+    reset: () => {
+        const [image, addedFeatures] = env.reset()
 
-    private i = 0
-    private max = 0
+        const imageArray = Array.from(image)
+        const addedFeaturesArray = Array.from(addedFeatures)
 
-    /**
-     * Update the cart-pole system using an action.
-     * @param {number} action Only the sign of `action` matters.
-     *   A value > 0 leads to a rightward force of a fixed magnitude.
-     *   A value <= 0 leads to a leftward force of the same fixed magnitude.
-     */
-    step(action: number | number[]): [number[], number, boolean] {
+        return addedFeaturesArray.concat(imageArray)
+    },
+    step: (action: number | number[]) => {
         if (Array.isArray(action)) {
             action = action[0]
         }
 
-        const force = action * this.forceMag
-
-        const cosTheta = Math.cos(this.theta)
-        const sinTheta = Math.sin(this.theta)
-        ++this.i
-
-        const temp =
-            (force + this.poleMoment * this.thetaDot * this.thetaDot * sinTheta) / this.totalMass
-        const thetaAcc =
-            (this.gravity * sinTheta - cosTheta * temp) /
-            (this.length * (4 / 3 - (this.massPole * cosTheta * cosTheta) / this.totalMass))
-        const xAcc = temp - (this.poleMoment * thetaAcc * cosTheta) / this.totalMass
-
-        // Update the four state variables, using Euler's method.
-        this.x += this.tau * this.xDot
-        this.xDot += this.tau * xAcc
-        this.theta += this.tau * this.thetaDot
-        this.thetaDot += this.tau * thetaAcc
-
-        let reward = 0
-
-        if (this.isDone()) {
-            reward = -100
-        } else {
-            reward = 1
-        }
-
-        return [this.getStateTensor(), reward, this.isDone()]
-    }
+        inputBuffer.writeUInt8(action, 0)
+        const [reward, done, image, addedFeatures] = env.step(inputBuffer)
 
-    /**
-     * Set the state of the cart-pole system randomly.
-     */
-    reset() {
-        this.i = 0
-        // The control-theory state variables of the cart-pole system.
-        // Cart position, meters.
-        this.x = Math.random() - 0.5
-        // Cart velocity.
-        this.xDot = (Math.random() - 0.5) * 1
-        // Pole angle, radians.
-        this.theta = (Math.random() - 0.5) * 2 * ((6 / 360) * 2 * Math.PI)
-        // Pole angle velocity.
-        this.thetaDot = (Math.random() - 0.5) * 0.5
-
-        return this.getStateTensor()
-    }
+        const imageArray = Array.from(image)
+        const addedFeaturesArray = Array.from(addedFeatures)
 
-    /**
-     * Determine whether this simulation is done.
-     *
-     * A simulation is done when `x` (position of the cart) goes out of bound
-     * or when `theta` (angle of the pole) goes out of bound.
-     *
-     * @returns {bool} Whether the simulation is done.
-     */
-    isDone() {
-        return (
-            this.x < -this.xThreshold ||
-            this.x > this.xThreshold ||
-            this.theta < -this.thetaThreshold ||
-            this.theta > this.thetaThreshold
-        )
-    }
+        return [addedFeaturesArray.concat(imageArray), reward, done]
+    },
 }
 
-import "@tensorflow/tfjs-backend-webgl"
-import "@tensorflow/tfjs-backend-webgpu"
-import { DQN } from "./dqn/dqn"
-import { SoftActorCritic } from "./soft-actor-critic/soft-actor-critic"
-
-if (false) {
-    tf.setBackend("webgl").then(() => {
-        // const env = new PolyburnEnvironment()
-        const env = new CartPole()
-        const envTest = new CartPole()
-
-        const sac = new SoftActorCritic({
-            mlpSpec: {
-                sizes: [32, 32],
-                activation: "relu",
-                outputActivation: "relu",
-            },
-
-            actionSize: 1,
-            observationSize: 4,
-
-            maxEpisodeLength: 1000,
-            bufferSize: 10_000,
-            batchSize: 128,
-            updateAfter: 1000,
-            updateEvery: 50,
-
-            learningRate: 1e-3,
-            alpha: 0.2,
-            gamma: 0.99,
-            polyak: 0.995,
-        })
-
-        /*
-        function possibleLifetime() {
-            let observation = env.reset()
-
-            let totalReward = 0
-            const inputs = []
-
-            while (true) {
-                const action = sac.act(observation, true)
-                inputs.push(env.inputFromAction(action as number[]))
-
-                const [nextObservation, reward, done] = env.step(action)
-
-                totalReward += reward
-                observation = nextObservation
-
-                if (done) {
-                    break
-                }
-            }
-
-            return {
-                totalReward,
-                touched: env.state().touched,
-                distance: env.state().distance,
-                inputs,
-            }
-        }
-
-        sac.learn(env, {
-            stepsPerEpoch: 100,
-            epochs: 1000,
-            onFirstEpisodeInEpoch() {
-                const lt = possibleLifetime()
-                console.log(
-                    `Reward: ${lt.totalReward}, touched: ${lt.touched}, distance: ${lt.distance}`,
-                )
-            },
-        })
-        */
-
-        sac.learn(env, {
-            stepsPerEpoch: 100,
-            epochs: 1000,
-            onEpochFinish() {
-                let observation = envTest.reset()
-                let t = 0
+function model() {
+    const featureCount = 6
 
-                while (t < 1000) {
-                    const action = sac.act(observation, true)
-                    const [nextObservation, , done] = envTest.step(action)
+    const width = 64
+    const height = 64
 
-                    if (done) {
-                        break
-                    }
+    const input = tf.input({ shape: [width * height * 3 + featureCount] })
 
-                    observation = nextObservation
-                    t++
-                }
+    const splitLayer = new SplitLayer(featureCount)
+    const x = splitLayer.apply(input)
+    console.log("x: ", x)
+    let [addedFeatures, image] = x as tf.SymbolicTensor[]
 
-                console.log("Length: ", t)
-            },
-        })
-
-        function iteration() {
-            requestAnimationFrame(iteration)
-        }
-
-        requestAnimationFrame(iteration)
-
-        return
-        fetch("http://localhost:5173/batches.json")
-            .then(r =>
-                r
-                    .json()
-                    .then(j => {
-                        const batches = JSON.parse(j)
-                        let i = 0
-
-                        function currentReward() {
-                            const acc = []
-
-                            for (let j = 0; j < 100; ++j) {
-                                env.reset()
-
-                                let x = 0
-
-                                while (!env.isDone() && x < 1000) {
-                                    env.step(sac.act(env.getStateTensor(), true))
-                                    x++
-                                }
-
-                                acc.push(x)
-                            }
-
-                            // average of top 10% lifetimes
-                            acc.sort((a, b) => b - a)
-
-                            const best10avg = acc.slice(0, 10).reduce((a, b) => a + b, 0) / 10
-                            const worst10avg = acc.slice(-10).reduce((a, b) => a + b, 0) / 10
-                            const avg = acc.reduce((a, b) => a + b, 0) / acc.length
-
-                            return { avg, best10avg, worst10avg }
-                        }
-
-                        for (const batch of batches) {
-                            sac.update({
-                                observation: tf.tensor2d(batch.observation),
-                                action: tf.tensor2d(batch.action),
-                                reward: tf.tensor1d(batch.reward),
-                                nextObservation: tf.tensor2d(batch.nextObservation),
-                                done: tf.tensor1d(batch.done),
-                            })
-
-                            console.log(`Batch ${i++} done`)
-                        }
-
-                        console.log("Reward: ", currentReward())
-
-                        console.log("Done")
-                    })
-                    .catch(e => {
-                        console.error(e)
-                    }),
-            )
-            .catch(e => {
-                console.error(e)
-            })
-
-        /*
-        const actor = new Actor(4, 2, {
-            sizes: [32, 32],
+    image = tf.layers
+        .conv2d({
+            filters: 16,
+            kernelSize: 8,
+            strides: 4,
             activation: "relu",
-            outputActivation: "relu",
         })
+        .apply(image) as tf.SymbolicTensor
 
-        actor.trainableWeights.forEach(w => {
-            w.write(tf.zeros(w.shape, w.dtype))
+    image = tf.layers
+        .conv2d({
+            filters: 32,
+            kernelSize: 4,
+            strides: 2,
+            activation: "relu",
         })
+        .apply(image) as tf.SymbolicTensor
 
-        /*
-        x = torch.tensor([[0.1, 0.2, 0.3, 0.4]], dtype=torch.float32)
-        x = actor(x, True)
-
-        const x = tf.tensor2d([[0.1, 0.2, 0.3, 0.4]])
-        const r = actor.apply(x, { deterministic: true }) as tf.Tensor<tf.Rank>[]
-
-        console.log(r[0].dataSync())
-        console.log(r[1].dataSync())
-        */
-    })
-}
-
-if (false) {
-    tf.setBackend("cpu").then(() => {
-        const env = new CartPole()
-
-        const sac = new SoftActorCritic({
-            mlpSpec: {
-                sizes: [32, 32],
-                activation: "relu",
-                outputActivation: "relu",
-            },
-
-            actionSize: 1,
-            observationSize: 4,
-
-            maxEpisodeLength: 1000,
-            bufferSize: 1e6,
-            batchSize: 100,
-            updateAfter: 1000,
-            updateEvery: 50,
-
-            learningRate: 1e-3,
-            alpha: 0.2,
-            gamma: 0.99,
-            polyak: 0.995,
+    image = tf.layers
+        .conv2d({
+            filters: 32,
+            kernelSize: 3,
+            strides: 1,
+            activation: "relu",
         })
+        .apply(image) as tf.SymbolicTensor
 
-        function currentReward() {
-            const acc = []
-
-            for (let j = 0; j < 10; ++j) {
-                env.reset()
-
-                let x = 0
-
-                while (!env.isDone() && x < 1000) {
-                    env.step(sac.act(env.getStateTensor(), false))
-                    x++
-                }
-
-                acc.push(x)
-            }
-
-            // average of top 10% lifetimes
-            acc.sort((a, b) => b - a)
-
-            const best10avg = acc.slice(0, 10).reduce((a, b) => a + b, 0) / 10
-            const worst10avg = acc.slice(-10).reduce((a, b) => a + b, 0) / 10
-            const avg = acc.reduce((a, b) => a + b, 0) / acc.length
-
-            return { avg, best10avg, worst10avg }
-        }
-
-        let t = 0
-        let updated = false
-
-        function iteration() {
-            for (let i = 0; i < 16; ++i) {
-                t++
-
-                const observation = env.getStateTensor()
-
-                let action: number[]
-
-                if (t < 10_000) {
-                    action = [Math.random() * 2 - 1]
-                } else {
-                    action = sac.act(observation, false)
-                }
-
-                const [nextObservation, reward, done] = env.step(action)
-
-                const thisTimeUpdated = sac.observe({
-                    observation,
-                    action,
-                    reward,
-                    nextObservation,
-                    done,
-                })
-
-                updated ||= thisTimeUpdated
-
-                if (done) {
-                    if (updated) {
-                        const { avg, best10avg, worst10avg } = currentReward()
-
-                        console.log(`Leaks: ${tf.memory().numTensors}`)
-                        console.log(`10%: ${best10avg}, 90%: ${worst10avg}, avg: ${avg}`)
-                    }
-
-                    env.reset()
-
-                    updated = false
-                }
-            }
-
-            requestAnimationFrame(iteration)
-        }
-
-        console.log("Start")
-        requestAnimationFrame(iteration)
-
-        /*
-        const ppo = new PPO(
-            {
-                steps: 512,
-                epochs: 15,
-                policyLearningRate: 1e-3,
-                valueLearningRate: 1e-3,
-                clipRatio: 0.1,
-                targetKL: 0.01,
-                gamma: 0.99,
-                lambda: 0.95,
-                observationDimension: 4,
-                actionSpace: {
-                    class: "Discrete",
-                    len: 2,
-                },
-            },
-            env,
-            tf.sequential({
-                layers: [
-                    tf.layers.dense({
-                        inputDim: 4,
-                        units: 32,
-                        activation: "relu",
-                    }),
-                    tf.layers.dense({
-                        units: 32,
-                        activation: "relu",
-                    }),
-                ],
-            }),
-            tf.sequential({
-                layers: [
-                    tf.layers.dense({
-                        inputDim: 4,
-                        units: 32,
-                        activation: "relu",
-                    }),
-                    tf.layers.dense({
-                        units: 32,
-                        activation: "relu",
-                    }),
-                ],
-            }),
-        )
-
-        function possibleLifetime() {
-            const acc = []
-
-            for (let j = 0; j < 25; ++j) {
-                env.reset()
-
-                let t = 0
-
-                while (!env.isDone() && t < 1000) {
-                    env.step(ppo.act(env.getStateTensor()) as number[])
-                    t++
-                }
-
-                acc.push(t)
-            }
-
-            // average of top 10% lifetimes
-            acc.sort((a, b) => b - a)
-
-            const best10avg = acc.slice(0, 10).reduce((a, b) => a + b, 0) / 10
-            const worst10avg = acc.slice(-10).reduce((a, b) => a + b, 0) / 10
-            const avg = acc.reduce((a, b) => a + b, 0) / acc.length
-
-            return { avg, best10avg, worst10avg }
-        }
-
-        let currentAverage = 0
-        let i = 0
-
-        function iteration() {
-            ppo.learn(512 * i)
+    const imageFlat = tf.layers.flatten().apply(image)
 
-            const { avg, best10avg, worst10avg } = possibleLifetime()
+    const imageReduced = tf.layers.dense({ units: 256 }).apply(imageFlat) as tf.SymbolicTensor
 
-            console.log(`Leaks: ${tf.memory().numTensors}`)
-            console.log(`10%: ${best10avg}, 90%: ${worst10avg}, avg: ${avg}`)
-
-            if (avg > currentAverage) {
-                // await ppo.save()
-                currentAverage = avg
-                console.log("Saved")
-            }
-
-            i++
-
-            requestAnimationFrame(iteration)
-        }
+    let features = tf.layers.concatenate().apply([imageReduced, addedFeatures])
 
-        console.log("Initial: ", possibleLifetime())
+    features = tf.layers
+        .dense({ units: 256, activation: "relu" })
+        .apply(features) as tf.SymbolicTensor
 
-        console.log("Start")
-        requestAnimationFrame(iteration)
+    features = tf.layers
+        .dense({ units: 64, activation: "relu" })
+        .apply(features) as tf.SymbolicTensor
 
-    */
-    })
+    return tf.model({ inputs: input, outputs: features })
 }
 
-if (true) {
-    tf.setBackend("webgl").then(() => {
-        const env = new PolyburnEnvironment()
-
-        const inputDim = 5
-
-        const dqn = new DQN(
-            {
-                actionDim: 6,
-                observationDim: inputDim,
-
-                learningRate: 1e-4,
-                bufferSize: 1e6,
-                gamma: 0.99,
-                tau: 0.001,
-                targetNetworkFrequency: 5,
-                batchSize: 128,
-
-                learningStartFrom: 10000,
-                trainingFrequency: 10,
-            },
-            [128, 128],
-        )
-
-        /*
-        const ppo = new PPO(
-            {
-                steps: 512,
-                epochs: 20,
-                policyLearningRate: 1e-4,
-                valueLearningRate: 1e-4,
-                clipRatio: 0.2,
-                targetKL: 0.01,
-                gamma: 0.99,
-                lambda: 0.95,
-                observationDimension: inputDim,
-                actionSpace: {
-                    class: "Discrete",
-                    len: 6,
-                },
-            },
-            env,
-            tf.sequential({
-                layers: [
-                    tf.layers.dense({
-                        inputDim: inputDim,
-                        units: 128,
-                        activation: "relu",
-                    }),
-                    tf.layers.dense({
-                        units: 128,
-                        activation: "relu",
-                    }),
-                    tf.layers.dense({
-                        units: 128,
-                        activation: "relu",
-                    }),
-                ],
-            }),
-            tf.sequential({
-                layers: [
-                    tf.layers.dense({
-                        inputDim: inputDim,
-                        units: 128,
-                        activation: "relu",
-                    }),
-                    tf.layers.dense({
-                        units: 128,
-                        activation: "relu",
-                    }),
-                    tf.layers.dense({
-                        units: 128,
-                        activation: "relu",
-                    }),
-                ],
-            }),
-        )
-        */
-
-        function possibleLifetime() {
-            let observation = env.reset()
-
-            let totalReward = 0
-            const inputs: number[] = []
-
-            while (true) {
-                const action = dqn.act(observation)
-                const [nextObservation, reward, done] = env.step(action)
-
-                totalReward += reward
-                observation = nextObservation
-
-                if (done) {
-                    break
-                }
-            }
-
-            return {
-                totalReward,
-                touched: env.state().touched,
-                distance: env.state().distance,
-                inputs,
-            }
-        }
-
-        dqn.learn(env, {
-            stepsPerEpoch: 100,
-            epochs: 1000,
-            startEpsilon: 0.5,
-            endEpsilon: 0.01,
-            explorationStepsFraction: 0.1,
-            onTest() {
-                const info = possibleLifetime()
-
-                console.log(
-                    `Reward ${0}: reward(${info.totalReward}), distance(${info.distance}), touched(${info.touched})`,
-                )
-            },
-        })
-
-        function iteration() {
-            requestAnimationFrame(iteration)
-        }
-
-        requestAnimationFrame(iteration)
-
-        /*
-        function iteration() {
-            
-            const info = possibleLifetime()
-
-            console.log(
-                `Reward ${i}: reward(${info.totalReward}), distance(${info.distance}), touched(${info.touched})`,
-            )
-
-            if (info.totalReward > currentAverage && previousTwenty.length === 20) {
-                currentAverage = info.totalReward
-                console.log("Saved")
-            }
-
-            if (previousTwenty.length === 20) {
-                previousTwenty.shift()
-            }
-
-            previousTwenty.push(info.totalReward)
-
-            const avgPreviousTwenty =
-                previousTwenty.reduce((a, b) => a + b, 0) / previousTwenty.length
-
-            ++i
-
-            if (
-                avgPreviousTwenty < 50 &&
-                avgPreviousTwenty < Math.max(currentAverage, 10) * 0.5 &&
-                previousTwenty.length === 20
-            ) {
-                console.log("Restoring")
-
-                requestAnimationFrame(iteration)
-            } else {
-                requestAnimationFrame(iteration)
-            }
-        }
-
-        const { totalReward, inputs } = possibleLifetime()
-        currentAverage = totalReward
-
-        console.log(JSON.stringify(inputs))
-
-        console.log("Start with: ", currentAverage)
-        requestAnimationFrame(iteration)
-
-        /*
-        ppo.restore().finally(() => {
-            const { totalReward, inputs } = possibleLifetime()
-            currentAverage = totalReward
-
-            console.log(JSON.stringify(inputs))
-
-            console.log("Start with: ", currentAverage)
-            requestAnimationFrame(iteration)
-        })
-        */
-    })
-}
+const ppo = new PPO(
+    {
+        steps: 512,
+        epochs: 20,
+        policyLearningRate: 1e-4,
+        valueLearningRate: 1e-4,
+        clipRatio: 0.2,
+        targetKL: 0.01,
+        gamma: 0.99,
+        lambda: 0.95,
+        observationDimension: 64 * 64 * 3 + 6,
+        actionSpace: {
+            class: "Discrete",
+            len: 6,
+        },
+    },
+    envWrapped,
+    model(),
+    model(),
+)
+
+ppo.learn(100)
+
+while (true) {}
diff --git a/packages/learning/src/ppo/base-ppo.ts b/packages/learning/src/ppo/base-ppo.ts
deleted file mode 100644
index b7d9bf2b..00000000
--- a/packages/learning/src/ppo/base-ppo.ts
+++ /dev/null
@@ -1,580 +0,0 @@
-// Check if node
-if (typeof module === "object" && module.exports) {
-    var tf = require("@tensorflow/tfjs")
-}
-
-function log() {
-    console.log("[PPO]", ...arguments)
-}
-
-class BaseCallback {
-    constructor() {
-        this.nCalls = 0
-    }
-
-    _onStep(alg) {
-        return true
-    }
-    onStep(alg) {
-        this.nCalls += 1
-        return this._onStep(alg)
-    }
-
-    _onTrainingStart(alg) {}
-    onTrainingStart(alg) {
-        this._onTrainingStart(alg)
-    }
-
-    _onTrainingEnd(alg) {}
-    onTrainingEnd(alg) {
-        this._onTrainingEnd(alg)
-    }
-
-    _onRolloutStart(alg) {}
-    onRolloutStart(alg) {
-        this._onRolloutStart(alg)
-    }
-
-    _onRolloutEnd(alg) {}
-    onRolloutEnd(alg) {
-        this._onRolloutEnd(alg)
-    }
-}
-
-class FunctionalCallback extends BaseCallback {
-    constructor(callback) {
-        super()
-        this.callback = callback
-    }
-
-    _onStep(alg) {
-        if (this.callback) {
-            return this.callback(alg)
-        }
-        return true
-    }
-}
-
-class DictCallback extends BaseCallback {
-    constructor(callback) {
-        super()
-        this.callback = callback
-    }
-
-    _onStep(alg) {
-        if (this.callback && this.callback.onStep) {
-            return this.callback.onStep(alg)
-        }
-        return true
-    }
-
-    _onTrainingStart(alg) {
-        if (this.callback && this.callback.onTrainingStart) {
-            this.callback.onTrainingStart(alg)
-        }
-    }
-
-    _onTrainingEnd(alg) {
-        if (this.callback && this.callback.onTrainingEnd) {
-            this.callback.onTrainingEnd(alg)
-        }
-    }
-
-    _onRolloutStart(alg) {
-        if (this.callback && this.callback.onRolloutStart) {
-            this.callback.onRolloutStart(alg)
-        }
-    }
-
-    _onRolloutEnd(alg) {
-        if (this.callback && this.callback.onRolloutEnd) {
-            this.callback.onRolloutEnd(alg)
-        }
-    }
-}
-
-class Buffer {
-    private gamma: number
-    private lam: number
-
-    private observationBuffer: number[][] = []
-    private actionBuffer: number[][] = []
-    private advantageBuffer: number[] = []
-    private rewardBuffer: number[] = []
-    private returnBuffer: number[] = []
-    private criticPredictionBuffer: number[] = []
-    private logProbabilityBuffer: number[] = []
-
-    private trajectoryStartIndex: number = 0
-    private ptr: number = 0
-
-    constructor(_: any, gamma?: number, lam?: number) {
-        this.gamma = gamma ?? 0.99
-        this.lam = lam ?? 0.95
-
-        this.reset()
-    }
-
-    add(
-        observation: number[],
-        action: number[],
-        reward: number,
-        criticPrediction: number,
-        logProbability: number,
-    ) {
-        this.observationBuffer.push(observation.slice(0))
-        this.actionBuffer.push(action)
-        this.rewardBuffer.push(reward)
-        this.criticPredictionBuffer.push(criticPrediction)
-        this.logProbabilityBuffer.push(logProbability)
-
-        this.ptr++
-    }
-
-    discountedCumulativeSums(values: number[], coefficient: number) {
-        const result = Array(values.length)
-        let sum = 0
-
-        for (let i = values.length - 1; i >= 0; i--) {
-            sum = values[i] + sum * coefficient
-            result[i] = sum
-        }
-
-        return result
-    }
-
-    finishTrajectory(lastValue: number) {
-        const rewards = this.rewardBuffer.slice(this.trajectoryStartIndex, this.ptr)
-        rewards.push(lastValue * this.gamma)
-
-        const values = this.criticPredictionBuffer.slice(this.trajectoryStartIndex, this.ptr)
-        values.push(lastValue)
-
-        const deltas = rewards
-            .slice(0, -1)
-            .map((reward, ri) => reward - (values[ri] - this.gamma * values[ri + 1]))
-
-        this.advantageBuffer.push(...this.discountedCumulativeSums(deltas, this.gamma * this.lam))
-        this.returnBuffer.push(...this.discountedCumulativeSums(rewards, this.gamma).slice(0, -1))
-
-        this.trajectoryStartIndex = this.ptr
-    }
-
-    get() {
-        const [advantageMean, advantageStd] = tf.tidy(() => [
-            tf.mean(this.advantageBuffer).arraySync(),
-            tf.moments(this.advantageBuffer).variance.sqrt().arraySync(),
-        ])
-
-        this.advantageBuffer = this.advantageBuffer.map(
-            advantage => (advantage - advantageMean) / advantageStd,
-        )
-
-        return [
-            this.observationBuffer,
-            this.actionBuffer,
-            this.advantageBuffer,
-            this.returnBuffer,
-            this.logProbabilityBuffer,
-        ]
-    }
-
-    reset() {
-        this.observationBuffer.length = 0
-        this.actionBuffer.length = 0
-        this.advantageBuffer.length = 0
-        this.rewardBuffer.length = 0
-        this.returnBuffer.length = 0
-        this.criticPredictionBuffer.length = 0
-        this.logProbabilityBuffer.length = 0
-
-        this.trajectoryStartIndex = 0
-        this.ptr = 0
-    }
-}
-
-class PPO {
-    constructor(env, config) {
-        const configDefault = {
-            nSteps: 512,
-            nEpochs: 10,
-            policyLearningRate: 1e-3,
-            valueLearningRate: 1e-3,
-            clipRatio: 0.2,
-            targetKL: 0.01,
-            useSDE: false, // TODO: State Dependent Exploration (gSDE)
-            netArch: {
-                pi: [32, 32],
-                vf: [32, 32],
-            },
-            activation: "relu",
-            verbose: 0,
-        }
-        this.config = Object.assign({}, configDefault, config)
-
-        // Prepare network architecture
-        if (Array.isArray(this.config.netArch)) {
-            this.config.netArch = {
-                pi: this.config.netArch,
-                vf: this.config.netArch,
-            }
-        }
-
-        // Initialize logger
-        this.log = (...args) => {
-            if (this.config.verbose > 0) {
-                console.log("[PPO]", ...args)
-            }
-        }
-
-        // Initialize environment
-        this.env = env
-        if (this.env.actionSpace.class == "Discrete" && !this.env.actionSpace.dtype) {
-            this.env.actionSpace.dtype = "int32"
-        } else if (this.env.actionSpace.class == "Box" && !this.env.actionSpace.dtype) {
-            this.env.actionSpace.dtype = "float32"
-        }
-
-        // Initialize counters
-        this.numTimesteps = 0
-        this.lastObservation = null
-
-        // Initialize buffer
-        this.buffer = new Buffer(config)
-
-        // Initialize models for actor and critic
-        this.actor = this.createActor()
-        this.critic = this.createCritic()
-
-        // Initialize logStd (for continuous action space)
-        if (this.env.actionSpace.class == "Box") {
-            this.logStd = tf.variable(tf.zeros([this.env.actionSpace.shape[0]]), true, "logStd")
-        }
-
-        // Initialize optimizers
-        this.optPolicy = tf.train.adam(this.config.policyLearningRate)
-        this.optValue = tf.train.adam(this.config.valueLearningRate)
-    }
-
-    createActor() {
-        const input = tf.layers.input({ shape: this.env.observationSpace.shape })
-        let l = input
-        this.config.netArch.pi.forEach((units, i) => {
-            l = tf.layers
-                .dense({
-                    units,
-                    activation: this.config.activation,
-                })
-                .apply(l)
-        })
-        if (this.env.actionSpace.class == "Discrete") {
-            l = tf.layers
-                .dense({
-                    units: this.env.actionSpace.n,
-                    // kernelInitializer: 'glorotNormal'
-                })
-                .apply(l)
-        } else if (this.env.actionSpace.class == "Box") {
-            l = tf.layers
-                .dense({
-                    units: this.env.actionSpace.shape[0],
-                    // kernelInitializer: 'glorotNormal'
-                })
-                .apply(l)
-        } else {
-            throw new Error("Unknown action space class: " + this.env.actionSpace.class)
-        }
-        return tf.model({ inputs: input, outputs: l })
-    }
-
-    createCritic() {
-        // Initialize critic
-        const input = tf.layers.input({ shape: this.env.observationSpace.shape })
-        let l = input
-        this.config.netArch.vf.forEach((units, i) => {
-            l = tf.layers
-                .dense({
-                    units: units,
-                    activation: this.config.activation,
-                })
-                .apply(l)
-        })
-        l = tf.layers
-            .dense({
-                units: 1,
-                activation: "linear",
-            })
-            .apply(l)
-        return tf.model({ inputs: input, outputs: l })
-    }
-
-    sampleAction(observationT) {
-        return tf.tidy(() => {
-            const preds = tf.squeeze(this.actor.predict(observationT), 0)
-            let action
-            if (this.env.actionSpace.class == "Discrete") {
-                action = tf.squeeze(tf.multinomial(preds, 1), 0) // > []
-            } else if (this.env.actionSpace.class == "Box") {
-                action = tf.add(
-                    tf.mul(
-                        tf.randomStandardNormal([this.env.actionSpace.shape[0]]),
-                        tf.exp(this.logStd),
-                    ),
-                    preds,
-                ) // > [actionSpace.shape[0]]
-            }
-            return [preds, action]
-        })
-    }
-
-    logProbCategorical(logits, x) {
-        return tf.tidy(() => {
-            const numActions = logits.shape[logits.shape.length - 1]
-            const logprobabilitiesAll = tf.logSoftmax(logits)
-            return tf.sum(
-                tf.mul(tf.oneHot(x, numActions), logprobabilitiesAll),
-                logprobabilitiesAll.shape.length - 1,
-            )
-        })
-    }
-
-    logProbNormal(loc, scale, x) {
-        return tf.tidy(() => {
-            const logUnnormalized = tf.mul(
-                -0.5,
-                tf.square(tf.sub(tf.div(x, scale), tf.div(loc, scale))),
-            )
-            const logNormalization = tf.add(tf.scalar(0.5 * Math.log(2.0 * Math.PI)), tf.log(scale))
-            return tf.sum(
-                tf.sub(logUnnormalized, logNormalization),
-                logUnnormalized.shape.length - 1,
-            )
-        })
-    }
-
-    logProb(preds, actions) {
-        // Preds can be logits or means
-        if (this.env.actionSpace.class == "Discrete") {
-            return this.logProbCategorical(preds, actions)
-        } else if (this.env.actionSpace.class == "Box") {
-            return this.logProbNormal(preds, tf.exp(this.logStd), actions)
-        }
-    }
-
-    predict(observation, deterministic = false) {
-        return this.actor.predict(observation)
-    }
-
-    trainPolicy(observationBufferT, actionBufferT, logprobabilityBufferT, advantageBufferT) {
-        const optFunc = () => {
-            const predsT = this.actor.predict(observationBufferT) // -> Logits or means
-            const diffT = tf.sub(this.logProb(predsT, actionBufferT), logprobabilityBufferT)
-            const ratioT = tf.exp(diffT)
-            const minAdvantageT = tf.where(
-                tf.greater(advantageBufferT, 0),
-                tf.mul(tf.add(1, this.config.clipRatio), advantageBufferT),
-                tf.mul(tf.sub(1, this.config.clipRatio), advantageBufferT),
-            )
-            const policyLoss = tf.neg(
-                tf.mean(tf.minimum(tf.mul(ratioT, advantageBufferT), minAdvantageT)),
-            )
-            return policyLoss
-        }
-
-        return tf.tidy(() => {
-            const { values, grads } = this.optPolicy.computeGradients(optFunc)
-            this.optPolicy.applyGradients(grads)
-            const kl = tf.mean(
-                tf.sub(
-                    logprobabilityBufferT,
-                    this.logProb(this.actor.predict(observationBufferT), actionBufferT),
-                ),
-            )
-            return kl.arraySync()
-        })
-    }
-
-    trainValue(observationBufferT, returnBufferT) {
-        const optFunc = () => {
-            const valuesPredT = this.critic.predict(observationBufferT)
-            return tf.losses.meanSquaredError(returnBufferT, valuesPredT)
-        }
-
-        tf.tidy(() => {
-            const { values, grads } = this.optValue.computeGradients(optFunc)
-            this.optValue.applyGradients(grads)
-        })
-    }
-
-    _initCallback(callback) {
-        // Function, not class
-        if (typeof callback === "function") {
-            if (callback.prototype.constructor === undefined) {
-                return new FunctionalCallback(callback)
-            }
-            return callback
-        }
-        if (typeof callback === "object") {
-            return new DictCallback(callback)
-        }
-        return new BaseCallback()
-    }
-
-    async collectRollouts(callback) {
-        if (this.lastObservation === null) {
-            this.lastObservation = this.env.reset()
-        }
-
-        this.buffer.reset()
-        callback.onRolloutStart(this)
-
-        let sumReturn = 0
-        let sumLength = 0
-        let numEpisodes = 0
-
-        const allPreds = []
-        const allActions = []
-        const allClippedActions = []
-
-        for (let step = 0; step < this.config.nSteps; step++) {
-            // Predict action, value and logprob from last observation
-            const [preds, action, value, logprobability] = tf.tidy(() => {
-                const lastObservationT = tf.tensor([this.lastObservation])
-                const [predsT, actionT] = this.sampleAction(lastObservationT)
-                const valueT = this.critic.predict(lastObservationT)
-                const logprobabilityT = this.logProb(predsT, actionT)
-                return [
-                    predsT.arraySync(), // -> Discrete: [actionSpace.n] or Box: [actionSpace.shape[0]]
-                    actionT.arraySync(), // -> Discrete: [] or Box: [actionSpace.shape[0]]
-                    valueT.arraySync()[0][0],
-                    logprobabilityT.arraySync(),
-                ]
-            })
-            allPreds.push(preds)
-            allActions.push(action)
-
-            // Rescale for continuous action space
-            let clippedAction = action
-            if (this.env.actionSpace.class == "Box") {
-                let h = this.env.actionSpace.high
-                let l = this.env.actionSpace.low
-                if (typeof h === "number" && typeof l === "number") {
-                    clippedAction = action.map(a => {
-                        return Math.min(Math.max(a, l), h)
-                    })
-                }
-            }
-            allClippedActions.push(clippedAction)
-
-            // Take action in environment
-            const [newObservation, reward, done] = await this.env.step(clippedAction)
-            sumReturn += reward
-            sumLength += 1
-
-            // Update global timestep counter
-            this.numTimesteps += 1
-
-            callback.onStep(this)
-
-            this.buffer.add(this.lastObservation, action, reward, value, logprobability)
-
-            this.lastObservation = newObservation
-
-            if (done || step === this.config.nSteps - 1) {
-                const lastValue = done
-                    ? 0
-                    : tf.tidy(() =>
-                          this.critic.predict(tf.tensor([newObservation])).arraySync(),
-                      )[0][0]
-                this.buffer.finishTrajectory(lastValue)
-                numEpisodes += 1
-                this.lastObservation = this.env.reset()
-            }
-        }
-
-        callback.onRolloutEnd(this)
-    }
-
-    async train(config) {
-        // Get values from the buffer
-        const [
-            observationBuffer,
-            actionBuffer,
-            advantageBuffer,
-            returnBuffer,
-            logprobabilityBuffer,
-        ] = this.buffer.get()
-
-        const [
-            observationBufferT,
-            actionBufferT,
-            advantageBufferT,
-            returnBufferT,
-            logprobabilityBufferT,
-        ] = tf.tidy(() => [
-            tf.tensor(observationBuffer),
-            tf.tensor(actionBuffer, null, this.env.actionSpace.dtype),
-            tf.tensor(advantageBuffer),
-            tf.tensor(returnBuffer).reshape([-1, 1]),
-            tf.tensor(logprobabilityBuffer),
-        ])
-
-        for (let i = 0; i < this.config.nEpochs; i++) {
-            const kl = this.trainPolicy(
-                observationBufferT,
-                actionBufferT,
-                logprobabilityBufferT,
-                advantageBufferT,
-            )
-            if (kl > 1.5 * this.config.targetKL) {
-                break
-            }
-        }
-
-        for (let i = 0; i < this.config.nEpochs; i++) {
-            this.trainValue(observationBufferT, returnBufferT)
-        }
-
-        tf.dispose([
-            observationBufferT,
-            actionBufferT,
-            advantageBufferT,
-            returnBufferT,
-            logprobabilityBufferT,
-        ])
-    }
-
-    async learn(learnConfig) {
-        const learnConfigDefault = {
-            totalTimesteps: 1000,
-            logInterval: 1,
-            callback: null,
-        }
-        let { totalTimesteps, logInterval, callback } = Object.assign(
-            {},
-            learnConfigDefault,
-            learnConfig,
-        )
-
-        callback = this._initCallback(callback)
-
-        let iteration = 0
-
-        callback.onTrainingStart(this)
-
-        while (this.numTimesteps < totalTimesteps) {
-            await this.collectRollouts(callback)
-            iteration += 1
-            if (logInterval && iteration % logInterval === 0) {
-                log(`Timesteps: ${this.numTimesteps}`)
-            }
-            this.train()
-        }
-
-        callback.onTrainingEnd(this)
-    }
-}
-
-if (typeof module === "object" && module.exports) {
-    module.exports = PPO
-}
diff --git a/packages/web-game/src/game/game-agent-as-player.ts b/packages/web-game/src/game/game-agent-as-player.ts
index 6c4bc872..6f247769 100644
--- a/packages/web-game/src/game/game-agent-as-player.ts
+++ b/packages/web-game/src/game/game-agent-as-player.ts
@@ -3,12 +3,14 @@ import { GameInterface } from "./game"
 import { GameAgentWrapper } from "./game-agent-wrapper"
 import { GameSettings } from "./game-settings"
 import { ModuleInput } from "./modules/module-input/module-input"
+import { DefaultGameReward, Reward } from "./reward/default-reward"
 import { ExtendedRuntime, newExtendedRuntime } from "./runtime-extension/new-extended-runtime"
 
 export class GameAgentAsPlayer implements GameInterface {
     runtime: ExtendedRuntime
     input: ModuleInput
     gameWrapper: GameAgentWrapper
+    reward: Reward
 
     constructor(settings: GameSettings) {
         settings.canvas.width = 64
@@ -27,6 +29,8 @@ export class GameAgentAsPlayer implements GameInterface {
 
         this.gameWrapper = new GameAgentWrapper(this.runtime, this.runtime.factoryContext.scene)
         this.input = new ModuleInput(this.runtime)
+
+        this.reward = new DefaultGameReward(this.runtime)
     }
 
     dispose() {
@@ -43,7 +47,11 @@ export class GameAgentAsPlayer implements GameInterface {
             rotation: this.input.rotation(),
         }
 
-        this.gameWrapper.step(context)
+        const [reward, done] = this.reward.next(() => {
+            this.gameWrapper.step(context)
+        })
+
+        console.log("Reward:", reward, "Done:", done)
 
         this.runtime.factoryContext.renderer.render(
             this.runtime.factoryContext.scene,
diff --git a/packages/web-game/src/game/reward/default-reward.ts b/packages/web-game/src/game/reward/default-reward.ts
new file mode 100644
index 00000000..7902e78c
--- /dev/null
+++ b/packages/web-game/src/game/reward/default-reward.ts
@@ -0,0 +1,100 @@
+import { EntityWith, MessageCollector } from "runtime-framework"
+import { LevelCapturedMessage } from "runtime/src/core/level-capture/level-captured-message"
+import { RocketDeathMessage } from "runtime/src/core/rocket/rocket-death-message"
+import { RuntimeComponents } from "runtime/src/core/runtime-components"
+import { Runtime } from "runtime/src/runtime"
+
+export interface Reward {
+    next: (steps: () => void) => [number, boolean]
+}
+
+export type RewardFactory = (game: Runtime) => Reward
+
+export class DefaultGameReward implements Reward {
+    private captureCollector: MessageCollector<LevelCapturedMessage>
+    private deathCollector: MessageCollector<RocketDeathMessage>
+
+    private rocket: EntityWith<RuntimeComponents, "rocket" | "rigidBody">
+    private nextLevel: EntityWith<RuntimeComponents, "level">
+
+    private previousDistanceToLevel: number
+    private distanceToReward: number
+
+    private steps: number
+    private maxSteps: number
+
+    constructor(private runtime: Runtime) {
+        this.captureCollector = runtime.factoryContext.messageStore.collect("levelCaptured")
+        this.deathCollector = runtime.factoryContext.messageStore.collect("rocketDeath")
+
+        this.rocket = runtime.factoryContext.store.find("rocket", "rigidBody")[0]
+
+        // next level is nearest level that is not captured
+        this.nextLevel = nextFlag(runtime, this.rocket)
+
+        this.previousDistanceToLevel = this.findDistanceToFlag(this.nextLevel)
+        this.distanceToReward = 16 / this.previousDistanceToLevel
+
+        this.steps = 0
+        this.maxSteps = 15 * 20 * 4 // 20 seconds
+    }
+
+    next(steps: () => void): [number, boolean] {
+        let reward = 0
+        steps()
+        ++this.steps
+
+        if (this.steps >= this.maxSteps) {
+            return [-128, true]
+        }
+
+        for (const message of this.deathCollector) {
+            return [-32, true]
+        }
+
+        const distanceToFlag = this.findDistanceToFlag(this.nextLevel)
+
+        if (distanceToFlag < this.previousDistanceToLevel) {
+            reward += this.distanceToReward * (this.previousDistanceToLevel - distanceToFlag)
+            this.previousDistanceToLevel = distanceToFlag
+        }
+
+        if (this.nextLevel.components.level.inCapture) {
+            reward += 4
+        }
+
+        for (const message of this.captureCollector) {
+            reward += 512
+            this.nextLevel = nextFlag(this.runtime, this.rocket)
+            this.previousDistanceToLevel = this.findDistanceToFlag(this.nextLevel)
+        }
+
+        return [reward, false]
+    }
+
+    findDistanceToFlag(flagEntity: EntityWith<RuntimeComponents, "level">) {
+        const dx =
+            this.rocket.components.rigidBody.translation().x - flagEntity.components.level.flag.x
+        const dy =
+            this.rocket.components.rigidBody.translation().y - flagEntity.components.level.flag.y
+        return Math.sqrt(dx * dx + dy * dy)
+    }
+}
+
+function nextFlag(runtime: Runtime, rocket: EntityWith<RuntimeComponents, "rocket" | "rigidBody">) {
+    const distanceToFlag = (flagEntity: EntityWith<RuntimeComponents, "level">) => {
+        const dx = rocket.components.rigidBody.translation().x - flagEntity.components.level.flag.x
+        const dy = rocket.components.rigidBody.translation().y - flagEntity.components.level.flag.y
+        return Math.sqrt(dx * dx + dy * dy)
+    }
+
+    const nextLevel = runtime.factoryContext.store
+        .find("level")
+        .filter(level => !level.components.level.captured)
+        .map(level => [level, distanceToFlag(level)] as const)
+        .reduce(([minLevel, minDistance], [level, distance]) =>
+            distance < minDistance ? [level, distance] : [minLevel, minDistance],
+        )[0]
+
+    return nextLevel
+}
diff --git a/yarn.lock b/yarn.lock
index 7275dc58..93278179 100644
--- a/yarn.lock
+++ b/yarn.lock
@@ -2147,6 +2147,21 @@
   resolved "https://registry.yarnpkg.com/@mapbox/mapbox-gl-supported/-/mapbox-gl-supported-1.5.0.tgz#f60b6a55a5d8e5ee908347d2ce4250b15103dc8e"
   integrity sha512-/PT1P6DNf7vjEEiPkVIRJkvibbqWtqnyGaBz3nfRdcxclNSnSdaLU5tfAgcD7I8Yt5i+L19s406YLl1koLnLbg==
 
+"@mapbox/node-pre-gyp@1.0.9":
+  version "1.0.9"
+  resolved "https://registry.yarnpkg.com/@mapbox/node-pre-gyp/-/node-pre-gyp-1.0.9.tgz#09a8781a3a036151cdebbe8719d6f8b25d4058bc"
+  integrity sha512-aDF3S3rK9Q2gey/WAttUlISduDItz5BU3306M9Eyv6/oS40aMprnopshtlKTykxRNIBEZuRMaZAnbrQ4QtKGyw==
+  dependencies:
+    detect-libc "^2.0.0"
+    https-proxy-agent "^5.0.0"
+    make-dir "^3.1.0"
+    node-fetch "^2.6.7"
+    nopt "^5.0.0"
+    npmlog "^5.0.1"
+    rimraf "^3.0.2"
+    semver "^7.3.5"
+    tar "^6.1.11"
+
 "@mapbox/point-geometry@0.1.0", "@mapbox/point-geometry@^0.1.0", "@mapbox/point-geometry@~0.1.0":
   version "0.1.0"
   resolved "https://registry.yarnpkg.com/@mapbox/point-geometry/-/point-geometry-0.1.0.tgz#8a83f9335c7860effa2eeeca254332aa0aeed8f2"
@@ -3684,7 +3699,21 @@
   resolved "https://registry.yarnpkg.com/@tensorflow/tfjs-layers/-/tfjs-layers-4.19.0.tgz#73b5a3f5580807d5d56188d9b6ad4658d810fefa"
   integrity sha512-NufvuRaZdIyoG+R13d7oL8G5Bywox+ihPMiMZ3tWU+me8C8Y0pVC69mrnhOS9R8an7GDxKKSTTNEZhUvPvMGiQ==
 
-"@tensorflow/tfjs@^4.19.0":
+"@tensorflow/tfjs-node@^4.19.0":
+  version "4.19.0"
+  resolved "https://registry.yarnpkg.com/@tensorflow/tfjs-node/-/tfjs-node-4.19.0.tgz#a922db9cd8284cee8eb7655e539dab12c32c272c"
+  integrity sha512-1HLIAuu5azP8SW7t5EZc1W5VOdjWndJYz1N1agz0It/tMtnuWIdAfcY08VjfuiI/NhAwuPShehqv6CZ3SYh+Vg==
+  dependencies:
+    "@mapbox/node-pre-gyp" "1.0.9"
+    "@tensorflow/tfjs" "4.19.0"
+    adm-zip "^0.5.2"
+    google-protobuf "^3.9.2"
+    https-proxy-agent "^2.2.1"
+    progress "^2.0.0"
+    rimraf "^2.6.2"
+    tar "^4.4.6"
+
+"@tensorflow/tfjs@4.19.0", "@tensorflow/tfjs@^4.19.0":
   version "4.19.0"
   resolved "https://registry.yarnpkg.com/@tensorflow/tfjs/-/tfjs-4.19.0.tgz#b7c2e7911d89770d2432428328e0db8922501f5f"
   integrity sha512-d2A1lTc6my7GJ5LwqzXa+igJ5+18exwsnaphZ3roi5nJ197uwxVSMIc2vSJnqZz1KajC5/mZgQr67EZrpTFlBg==
@@ -4226,6 +4255,11 @@
   resolved "https://registry.yarnpkg.com/@xobotyi/scrollbar-width/-/scrollbar-width-1.9.5.tgz#80224a6919272f405b87913ca13b92929bdf3c4d"
   integrity sha512-N8tkAACJx2ww8vFMneJmaAgmjAG1tnVBZJRLRcx061tmsLRZHSEZSLuGWnwPtunsSLvSqXQ2wfp7Mgqg1I+2dQ==
 
+abbrev@1:
+  version "1.1.1"
+  resolved "https://registry.yarnpkg.com/abbrev/-/abbrev-1.1.1.tgz#f8f2c887ad10bf67f634f005b6987fed3179aac8"
+  integrity sha512-nne9/IiQ/hzIhY6pdDnbBtz7DjPTKrY00P/zvPSm5pOFkl6xuGrGnXn/VtTNNfNtAfZ9/1RtehkszU9qcTii0Q==
+
 abbrev@^2.0.0:
   version "2.0.0"
   resolved "https://registry.yarnpkg.com/abbrev/-/abbrev-2.0.0.tgz#cf59829b8b4f03f89dda2771cb7f3653828c89bf"
@@ -4256,6 +4290,25 @@ acorn@^8.10.0, acorn@^8.11.3, acorn@^8.8.0, acorn@^8.8.2, acorn@^8.9.0:
   resolved "https://registry.yarnpkg.com/acorn/-/acorn-8.11.3.tgz#71e0b14e13a4ec160724b38fb7b0f233b1b81d7a"
   integrity sha512-Y9rRfJG5jcKOE0CLisYbojUjIrIEE7AGMzA/Sm4BslANhbS+cDMpgBdcPT91oJ7OuJ9hYJBx59RjbhxVnrF8Xg==
 
+adm-zip@^0.5.2:
+  version "0.5.12"
+  resolved "https://registry.yarnpkg.com/adm-zip/-/adm-zip-0.5.12.tgz#87786328e91d54b37358d8a50f954c4cd73ba60b"
+  integrity sha512-6TVU49mK6KZb4qG6xWaaM4C7sA/sgUMLy/JYMOzkcp3BvVLpW0fXDFQiIzAuxFCt/2+xD7fNIiPFAoLZPhVNLQ==
+
+agent-base@6:
+  version "6.0.2"
+  resolved "https://registry.yarnpkg.com/agent-base/-/agent-base-6.0.2.tgz#49fff58577cfee3f37176feab4c22e00f86d7f77"
+  integrity sha512-RZNwNclF7+MS/8bDg70amg32dyeZGZxiDuQmZxKLAlQjr3jGyLx+4Kkk58UO7D2QdgFIQCovuSuZESne6RG6XQ==
+  dependencies:
+    debug "4"
+
+agent-base@^4.3.0:
+  version "4.3.0"
+  resolved "https://registry.yarnpkg.com/agent-base/-/agent-base-4.3.0.tgz#8165f01c436009bccad0b1d122f05ed770efc6ee"
+  integrity sha512-salcGninV0nPrwpGNn4VTXBb1SOuXQBiqbrNXoeizJsHrsL6ERFM2Ne3JUSBWRE6aeNJI2ROP/WEEIDUiDe3cg==
+  dependencies:
+    es6-promisify "^5.0.0"
+
 agent-base@^7.0.2, agent-base@^7.1.0, agent-base@^7.1.1:
   version "7.1.1"
   resolved "https://registry.yarnpkg.com/agent-base/-/agent-base-7.1.1.tgz#bdbded7dfb096b751a2a087eeeb9664725b2e317"
@@ -4343,6 +4396,19 @@ anymatch@~3.1.2:
     normalize-path "^3.0.0"
     picomatch "^2.0.4"
 
+"aproba@^1.0.3 || ^2.0.0":
+  version "2.0.0"
+  resolved "https://registry.yarnpkg.com/aproba/-/aproba-2.0.0.tgz#52520b8ae5b569215b354efc0caa3fe1e45a8adc"
+  integrity sha512-lYe4Gx7QT+MKGbDsA+Z+he/Wtef0BiwDOlK/XkBrdfsh9J/jPPXbX0tE9x9cl27Tmu5gg3QUbUrQYa/y+KOHPQ==
+
+are-we-there-yet@^2.0.0:
+  version "2.0.0"
+  resolved "https://registry.yarnpkg.com/are-we-there-yet/-/are-we-there-yet-2.0.0.tgz#372e0e7bd279d8e94c653aaa1f67200884bf3e1c"
+  integrity sha512-Ci/qENmwHnsYo9xKIcUJN5LeDKdJ6R1Z1j9V/J5wyq8nh/mYPEpIKJbBZXtZjG04HiK7zV/p6Vs9952MrMeUIw==
+  dependencies:
+    delegates "^1.0.0"
+    readable-stream "^3.6.0"
+
 arg@^5.0.2:
   version "5.0.2"
   resolved "https://registry.yarnpkg.com/arg/-/arg-5.0.2.tgz#c81433cc427c92c4dcf4865142dbca6f15acd59c"
@@ -4809,7 +4875,7 @@ chokidar@^3.5.3:
   optionalDependencies:
     fsevents "~2.3.2"
 
-chownr@^1.1.1:
+chownr@^1.1.1, chownr@^1.1.4:
   version "1.1.4"
   resolved "https://registry.yarnpkg.com/chownr/-/chownr-1.1.4.tgz#6fc9d7b42d32a583596337666e7d08084da2cc6b"
   integrity sha512-jJ0bqzaylmJtVnNgzTeSOs8DPavpbYgEr/b0YL8/2GO3xJEhInFmhKMUnEJQjZumK7KXGFhUy89PrsJWlakBVg==
@@ -4970,6 +5036,11 @@ color-space@^2.0.0:
   resolved "https://registry.yarnpkg.com/color-space/-/color-space-2.0.1.tgz#da39871175baf4a5785ba519397df04b8d67e0fa"
   integrity sha512-nKqUYlo0vZATVOFHY810BSYjmCARrG7e5R3UE3CQlyjJTvv5kSSmPG1kzm/oDyyqjehM+lW1RnEt9It9GNa5JA==
 
+color-support@^1.1.2:
+  version "1.1.3"
+  resolved "https://registry.yarnpkg.com/color-support/-/color-support-1.1.3.tgz#93834379a1cc9a0c61f82f52f0d04322251bd5a2"
+  integrity sha512-qiBjkpbMLO/HL68y+lh4q0/O1MZFj2RX6X/KmMa3+gJD3z+WwI1ZzDHysvqHGS3mP6mznPckpXmw1nI9cJjyRg==
+
 combined-stream@^1.0.8:
   version "1.0.8"
   resolved "https://registry.yarnpkg.com/combined-stream/-/combined-stream-1.0.8.tgz#c3d45a8b34fd730631a110a8a2520682b31d5a7f"
@@ -5031,6 +5102,11 @@ confbox@^0.1.7:
   resolved "https://registry.yarnpkg.com/confbox/-/confbox-0.1.7.tgz#ccfc0a2bcae36a84838e83a3b7f770fb17d6c579"
   integrity sha512-uJcB/FKZtBMCJpK8MQji6bJHgu1tixKPxRLeGkNzBoOZzpnZUJm0jm2/sBDWcuBx1dYgxV4JU+g5hmNxCyAmdA==
 
+console-control-strings@^1.0.0, console-control-strings@^1.1.0:
+  version "1.1.0"
+  resolved "https://registry.yarnpkg.com/console-control-strings/-/console-control-strings-1.1.0.tgz#3d7cf4464db6446ea644bf4b39507f9851008e8e"
+  integrity sha512-ty/fTekppD2fIwRvnZAVdeOiGd1c7YXEixbgJTNzqcxJWKQnjJ/V1bNEEE6hygpM3WjwHFUVK6HTjWSzV4a8sQ==
+
 convert-source-map@^2.0.0:
   version "2.0.0"
   resolved "https://registry.yarnpkg.com/convert-source-map/-/convert-source-map-2.0.0.tgz#4b560f649fc4e918dd0ab75cf4961e8bc882d82a"
@@ -5360,7 +5436,7 @@ debug@4, debug@^4.1.0, debug@^4.1.1, debug@^4.3.1, debug@^4.3.2, debug@^4.3.4:
   dependencies:
     ms "2.1.2"
 
-debug@^3.2.6:
+debug@^3.1.0, debug@^3.2.6:
   version "3.2.7"
   resolved "https://registry.yarnpkg.com/debug/-/debug-3.2.7.tgz#72580b7e9145fb39b6676f9c5e5fb100b934179a"
   integrity sha512-CFjzYYAi4ThfiQvizrFQevTTXHtnCqWfe7x1AhgEscTz6ZbLbfoLRLPugTQyBth6f8ZERVUSyWHFD/7Wu4t1XQ==
@@ -5429,6 +5505,11 @@ delayed-stream@~1.0.0:
   resolved "https://registry.yarnpkg.com/delayed-stream/-/delayed-stream-1.0.0.tgz#df3ae199acadfb7d440aaae0b29e2272b24ec619"
   integrity sha512-ZySD7Nf91aLB0RxL4KGrKHBXl7Eds1DAmEdcoVawXnLD7SDhpNgtuII2aAkg7a7QS41jxPSZ17p4VdGnMHk3MQ==
 
+delegates@^1.0.0:
+  version "1.0.0"
+  resolved "https://registry.yarnpkg.com/delegates/-/delegates-1.0.0.tgz#84c6e159b81904fdca59a0ef44cd870d31250f9a"
+  integrity sha512-bd2L678uiWATM6m5Z1VzNCErI3jiGzt6HGY8OVICs40JQq/HALfbyNJmp0UDakEY4pMMaN0Ly5om/B1VI/+xfQ==
+
 depd@~2.0.0:
   version "2.0.0"
   resolved "https://registry.yarnpkg.com/depd/-/depd-2.0.0.tgz#b696163cc757560d09cf22cc8fad1571b79e76df"
@@ -5829,6 +5910,18 @@ es6-iterator@^2.0.3:
     es5-ext "^0.10.35"
     es6-symbol "^3.1.1"
 
+es6-promise@^4.0.3:
+  version "4.2.8"
+  resolved "https://registry.yarnpkg.com/es6-promise/-/es6-promise-4.2.8.tgz#4eb21594c972bc40553d276e510539143db53e0a"
+  integrity sha512-HJDGx5daxeIvxdBxvG2cb9g4tEvwIk3i8+nhX0yGrYmZUzbkdg8QbDevheDB8gd0//uPj4c1EQua8Q+MViT0/w==
+
+es6-promisify@^5.0.0:
+  version "5.0.0"
+  resolved "https://registry.yarnpkg.com/es6-promisify/-/es6-promisify-5.0.0.tgz#5109d62f3e56ea967c4b63505aef08291c8a5203"
+  integrity sha512-C+d6UdsYDk0lMebHNR4S2NybQMMngAOnOwYBQjTOiv0MkoJMP0Myw2mgpDLBcpfCmRLxyFqYhS/CfOENq4SJhQ==
+  dependencies:
+    es6-promise "^4.0.3"
+
 es6-symbol@^3.1.1, es6-symbol@^3.1.3:
   version "3.1.4"
   resolved "https://registry.yarnpkg.com/es6-symbol/-/es6-symbol-3.1.4.tgz#f4e7d28013770b4208ecbf3e0bf14d3bcb557b8c"
@@ -6433,6 +6526,13 @@ fs-extra@^9.0.1:
     jsonfile "^6.0.1"
     universalify "^2.0.0"
 
+fs-minipass@^1.2.7:
+  version "1.2.7"
+  resolved "https://registry.yarnpkg.com/fs-minipass/-/fs-minipass-1.2.7.tgz#ccff8570841e7fe4265693da88936c55aed7f7c7"
+  integrity sha512-GWSSJGFy4e9GUeCcbIkED+bgAoFyj7XF1mV8rma3QW4NIqX9Kyx79N/PF61H5udOV3aY1IaMLs6pGbH71nlCTA==
+  dependencies:
+    minipass "^2.6.0"
+
 fs-minipass@^2.0.0:
   version "2.1.0"
   resolved "https://registry.yarnpkg.com/fs-minipass/-/fs-minipass-2.1.0.tgz#7f5036fdbf12c63c169190cbe4199c852271f9fb"
@@ -6482,6 +6582,21 @@ functions-have-names@^1.2.3:
   resolved "https://registry.yarnpkg.com/functions-have-names/-/functions-have-names-1.2.3.tgz#0404fe4ee2ba2f607f0e0ec3c80bae994133b834"
   integrity sha512-xckBUXyTIqT97tq2x2AMb+g163b5JFysYk0x4qxNFwbfQkmNZoiRHb6sPzI9/QV33WeuvVYBUIiD4NzNIyqaRQ==
 
+gauge@^3.0.0:
+  version "3.0.2"
+  resolved "https://registry.yarnpkg.com/gauge/-/gauge-3.0.2.tgz#03bf4441c044383908bcfa0656ad91803259b395"
+  integrity sha512-+5J6MS/5XksCuXq++uFRsnUd7Ovu1XenbeuIuNRJxYWjgQbPuFhT14lAvsWfqfAmnwluf1OwMjz39HjfLPci0Q==
+  dependencies:
+    aproba "^1.0.3 || ^2.0.0"
+    color-support "^1.1.2"
+    console-control-strings "^1.0.0"
+    has-unicode "^2.0.1"
+    object-assign "^4.1.1"
+    signal-exit "^3.0.0"
+    string-width "^4.2.3"
+    strip-ansi "^6.0.1"
+    wide-align "^1.1.2"
+
 gensync@^1.0.0-beta.2:
   version "1.0.0-beta.2"
   resolved "https://registry.yarnpkg.com/gensync/-/gensync-1.0.0-beta.2.tgz#32a6ee76c3d7f52d46b2b1ae5d93fea8580a25e0"
@@ -6833,6 +6948,11 @@ glslify@^7.0.0:
     through2 "^2.0.1"
     xtend "^4.0.0"
 
+google-protobuf@^3.9.2:
+  version "3.21.2"
+  resolved "https://registry.yarnpkg.com/google-protobuf/-/google-protobuf-3.21.2.tgz#4580a2bea8bbb291ee579d1fefb14d6fa3070ea4"
+  integrity sha512-3MSOYFO5U9mPGikIYCzK0SaThypfGgS6bHqrUGXG3DPHCrb+txNqeEcns1W0lkGfk0rCyNXm7xB9rMxnCiZOoA==
+
 gopd@^1.0.1:
   version "1.0.1"
   resolved "https://registry.yarnpkg.com/gopd/-/gopd-1.0.1.tgz#29ff76de69dac7489b7c0918a5788e56477c332c"
@@ -6916,6 +7036,11 @@ has-tostringtag@^1.0.0, has-tostringtag@^1.0.2:
   dependencies:
     has-symbols "^1.0.3"
 
+has-unicode@^2.0.1:
+  version "2.0.1"
+  resolved "https://registry.yarnpkg.com/has-unicode/-/has-unicode-2.0.1.tgz#e0e6fe6a28cf51138855e086d1691e771de2a8b9"
+  integrity sha512-8Rf9Y83NBReMnx0gFzA8JImQACstCYWUplepDa9xprwwtmgEZUF0h/i5xSA625zB/I37EtrswSST6OXxwaaIJQ==
+
 hasown@^2.0.0, hasown@^2.0.1, hasown@^2.0.2:
   version "2.0.2"
   resolved "https://registry.yarnpkg.com/hasown/-/hasown-2.0.2.tgz#003eaf91be7adc372e84ec59dc37252cedb80003"
@@ -6956,6 +7081,22 @@ http-proxy-agent@^7.0.0:
     agent-base "^7.1.0"
     debug "^4.3.4"
 
+https-proxy-agent@^2.2.1:
+  version "2.2.4"
+  resolved "https://registry.yarnpkg.com/https-proxy-agent/-/https-proxy-agent-2.2.4.tgz#4ee7a737abd92678a293d9b34a1af4d0d08c787b"
+  integrity sha512-OmvfoQ53WLjtA9HeYP9RNrWMJzzAz1JGaSFr1nijg0PVR1JaD/xbJq1mdEIIlxGpXp9eSe/O2LgU9DJmTPd0Eg==
+  dependencies:
+    agent-base "^4.3.0"
+    debug "^3.1.0"
+
+https-proxy-agent@^5.0.0:
+  version "5.0.1"
+  resolved "https://registry.yarnpkg.com/https-proxy-agent/-/https-proxy-agent-5.0.1.tgz#c59ef224a04fe8b754f3db0063a25ea30d0005d6"
+  integrity sha512-dFcAjpTQFgoLMzC2VwU+C/CbS7uRL0lWmxDITmqm7C+7F0Odmj6s9l6alZc6AELXhrnggM2CeWSXHGOdX2YtwA==
+  dependencies:
+    agent-base "6"
+    debug "4"
+
 https-proxy-agent@^7.0.1:
   version "7.0.4"
   resolved "https://registry.yarnpkg.com/https-proxy-agent/-/https-proxy-agent-7.0.4.tgz#8e97b841a029ad8ddc8731f26595bad868cb4168"
@@ -7693,6 +7834,13 @@ magic-string@^0.30.0, magic-string@^0.30.1:
   dependencies:
     "@jridgewell/sourcemap-codec" "^1.4.15"
 
+make-dir@^3.1.0:
+  version "3.1.0"
+  resolved "https://registry.yarnpkg.com/make-dir/-/make-dir-3.1.0.tgz#415e967046b3a7f1d185277d84aa58203726a13f"
+  integrity sha512-g3FeP20LNwhALb/6Cz6Dd4F2ngze0jz7tbzrD2wAV+o9FeNHe4rL+yK2md0J/fiSf1sa1ADhXqi5+oVwOM/eGw==
+  dependencies:
+    semver "^6.0.0"
+
 make-fetch-happen@^13.0.0:
   version "13.0.1"
   resolved "https://registry.yarnpkg.com/make-fetch-happen/-/make-fetch-happen-13.0.1.tgz#273ba2f78f45e1f3a6dca91cede87d9fa4821e36"
@@ -7905,6 +8053,14 @@ minipass-sized@^1.0.3:
   dependencies:
     minipass "^3.0.0"
 
+minipass@^2.6.0, minipass@^2.9.0:
+  version "2.9.0"
+  resolved "https://registry.yarnpkg.com/minipass/-/minipass-2.9.0.tgz#e713762e7d3e32fed803115cf93e04bca9fcc9a6"
+  integrity sha512-wxfUjg9WebH+CUDX/CdbRlh5SmfZiy/hpkxaRI16Y9W56Pa75sWgd/rvFilSgrauD9NyFymP/+JFV3KwzIsJeg==
+  dependencies:
+    safe-buffer "^5.1.2"
+    yallist "^3.0.0"
+
 minipass@^3.0.0:
   version "3.3.6"
   resolved "https://registry.yarnpkg.com/minipass/-/minipass-3.3.6.tgz#7bba384db3a1520d18c9c0e5251c3444e95dd94a"
@@ -7922,6 +8078,13 @@ minipass@^5.0.0:
   resolved "https://registry.yarnpkg.com/minipass/-/minipass-7.1.0.tgz#b545f84af94e567386770159302ca113469c80b8"
   integrity sha512-oGZRv2OT1lO2UF1zUcwdTb3wqUwI0kBGTgt/T7OdSj6M6N5m3o5uPf0AIW6lVxGGoiWUR7e2AwTE+xiwK8WQig==
 
+minizlib@^1.3.3:
+  version "1.3.3"
+  resolved "https://registry.yarnpkg.com/minizlib/-/minizlib-1.3.3.tgz#2290de96818a34c29551c8a8d301216bd65a861d"
+  integrity sha512-6ZYMOEnmVsdCeTJVE0W9ZD+pVnE8h9Hma/iOwwRDsdQoePpoX56/8B6z3P9VNwppJuBKNRuFDRNRqRWexT9G9Q==
+  dependencies:
+    minipass "^2.9.0"
+
 minizlib@^2.1.1, minizlib@^2.1.2:
   version "2.1.2"
   resolved "https://registry.yarnpkg.com/minizlib/-/minizlib-2.1.2.tgz#e90d3466ba209b932451508a11ce3d3632145931"
@@ -7935,6 +8098,13 @@ mkdirp-classic@^0.5.2, mkdirp-classic@^0.5.3:
   resolved "https://registry.yarnpkg.com/mkdirp-classic/-/mkdirp-classic-0.5.3.tgz#fa10c9115cc6d8865be221ba47ee9bed78601113"
   integrity sha512-gKLcREMhtuZRwRAfqP3RFW+TK4JqApVBtOIftVgjuABpAtpxhPGaDcfvbhNvD0B8iD1oUr/txX35NjcaY6Ns/A==
 
+mkdirp@^0.5.5:
+  version "0.5.6"
+  resolved "https://registry.yarnpkg.com/mkdirp/-/mkdirp-0.5.6.tgz#7def03d2432dcae4ba1d611445c48396062255f6"
+  integrity sha512-FP+p8RB8OWpF3YZBCrP5gtADmtXApB5AMLn+vdyA+PyxCjrCs00mjyUozssO33cwDeT3wNGdLxJ5M//YqtHAJw==
+  dependencies:
+    minimist "^1.2.6"
+
 mkdirp@^1.0.3:
   version "1.0.4"
   resolved "https://registry.yarnpkg.com/mkdirp/-/mkdirp-1.0.4.tgz#3eb5ed62622756d79a5f0e2a221dfebad75c2f7e"
@@ -8100,6 +8270,13 @@ node-abi@^3.3.0, node-abi@^3.56.0:
   dependencies:
     semver "^7.3.5"
 
+node-fetch@^2.6.7:
+  version "2.7.0"
+  resolved "https://registry.yarnpkg.com/node-fetch/-/node-fetch-2.7.0.tgz#d0f0fa6e3e2dc1d27efcd8ad99d550bda94d187d"
+  integrity sha512-c4FRfUm/dbcWZ7U+1Wq0AwCyFL+3nt2bEw05wfxSz+DWpWsitgmSgYmy2dQdWyKC1694ELPqMs/YzUSNozLt8A==
+  dependencies:
+    whatwg-url "^5.0.0"
+
 node-fetch@~2.6.1:
   version "2.6.13"
   resolved "https://registry.yarnpkg.com/node-fetch/-/node-fetch-2.6.13.tgz#a20acbbec73c2e09f9007de5cda17104122e0010"
@@ -8133,6 +8310,13 @@ node-releases@^2.0.14:
   resolved "https://registry.yarnpkg.com/node-releases/-/node-releases-2.0.14.tgz#2ffb053bceb8b2be8495ece1ab6ce600c4461b0b"
   integrity sha512-y10wOWt8yZpqXmOgRo77WaHEmhYQYGNA6y421PKsKYWEK8aW+cqAphborZDhqfyKrbZEN92CN1X2KbafY2s7Yw==
 
+nopt@^5.0.0:
+  version "5.0.0"
+  resolved "https://registry.yarnpkg.com/nopt/-/nopt-5.0.0.tgz#530942bb58a512fccafe53fe210f13a25355dc88"
+  integrity sha512-Tbj67rffqceeLpcRXrT7vKAN8CwfPeIBgM7E6iBkmKLV7bEMwpGgYLGv0jACUsECaa/vuxP0IjEont6umdMgtQ==
+  dependencies:
+    abbrev "1"
+
 nopt@^7.0.0:
   version "7.2.1"
   resolved "https://registry.yarnpkg.com/nopt/-/nopt-7.2.1.tgz#1cac0eab9b8e97c9093338446eddd40b2c8ca1e7"
@@ -8162,6 +8346,16 @@ normalize-svg-path@~0.1.0:
   resolved "https://registry.yarnpkg.com/normalize-svg-path/-/normalize-svg-path-0.1.0.tgz#456360e60ece75fbef7b5d7e160480e7ffd16fe5"
   integrity sha512-1/kmYej2iedi5+ROxkRESL/pI02pkg0OBnaR4hJkSIX6+ORzepwbuUXfrdZaPjysTsJInj0Rj5NuX027+dMBvA==
 
+npmlog@^5.0.1:
+  version "5.0.1"
+  resolved "https://registry.yarnpkg.com/npmlog/-/npmlog-5.0.1.tgz#f06678e80e29419ad67ab964e0fa69959c1eb8b0"
+  integrity sha512-AqZtDUWOMKs1G/8lwylVjrdYgqA4d9nu8hc+0gzRxlDb1I10+FHBGMXs6aiQHFdCUUlqH99MUMuLfzWDNDtfxw==
+  dependencies:
+    are-we-there-yet "^2.0.0"
+    console-control-strings "^1.1.0"
+    gauge "^3.0.0"
+    set-blocking "^2.0.0"
+
 number-is-integer@^1.0.1:
   version "1.0.1"
   resolved "https://registry.yarnpkg.com/number-is-integer/-/number-is-integer-1.0.1.tgz#e59bca172ffed27318e79c7ceb6cb72c095b2152"
@@ -8689,6 +8883,11 @@ process-nextick-args@~2.0.0:
   resolved "https://registry.yarnpkg.com/process-nextick-args/-/process-nextick-args-2.0.1.tgz#7820d9b16120cc55ca9ae7792680ae7dba6d7fe2"
   integrity sha512-3ouUOpQhtgrbOa17J7+uxOTpITYWaGP7/AhoR3+A+/1e9skrzelGi/dXzEYyvbxubEF6Wn2ypscTKiKJFFn1ag==
 
+progress@^2.0.0:
+  version "2.0.3"
+  resolved "https://registry.yarnpkg.com/progress/-/progress-2.0.3.tgz#7e8cf8d8f5b8f239c1bc68beb4eb78567d572ef8"
+  integrity sha512-7PiHtLll5LdnKIMw100I+8xJXR5gW2QwWYkT6iJva0bXitZKa/XMrSbdmg3r2Xnaidz9Qumd0VPaMrZlF9V9sA==
+
 promise-retry@^2.0.1:
   version "2.0.1"
   resolved "https://registry.yarnpkg.com/promise-retry/-/promise-retry-2.0.1.tgz#ff747a13620ab57ba688f5fc67855410c370da22"
@@ -8958,7 +9157,7 @@ readable-stream@^2.0.0, readable-stream@^2.1.4, readable-stream@^2.2.2, readable
     string_decoder "~1.1.1"
     util-deprecate "~1.0.1"
 
-readable-stream@^3.1.1, readable-stream@^3.4.0:
+readable-stream@^3.1.1, readable-stream@^3.4.0, readable-stream@^3.6.0:
   version "3.6.2"
   resolved "https://registry.yarnpkg.com/readable-stream/-/readable-stream-3.6.2.tgz#56a9b36ea965c00c5a93ef31eb111a0f11056967"
   integrity sha512-9u/sniCrY3D5WdsERHzHE4G2YCXqoG5FTHUiCC4SIbr6XcLZBY05ya9EKjYek9O5xOAwjGq+1JdGBAS7Q9ScoA==
@@ -9195,6 +9394,13 @@ right-now@^1.0.0:
   resolved "https://registry.yarnpkg.com/right-now/-/right-now-1.0.0.tgz#6e89609deebd7dcdaf8daecc9aea39cf585a0918"
   integrity sha512-DA8+YS+sMIVpbsuKgy+Z67L9Lxb1p05mNxRpDPNksPDEFir4vmBlUtuN9jkTGn9YMMdlBuK7XQgFiz6ws+yhSg==
 
+rimraf@^2.6.2:
+  version "2.7.1"
+  resolved "https://registry.yarnpkg.com/rimraf/-/rimraf-2.7.1.tgz#35797f13a7fdadc566142c29d4f07ccad483e3ec"
+  integrity sha512-uWjbaKIK3T1OSVptzX7Nl6PvQ3qAGtKEtVRjRuazjfL3Bx5eI409VZSqgND+4UNnmzLVdPj9FqFJNPqBZFve4w==
+  dependencies:
+    glob "^7.1.3"
+
 rimraf@^3.0.2:
   version "3.0.2"
   resolved "https://registry.yarnpkg.com/rimraf/-/rimraf-3.0.2.tgz#f1a5402ba6220ad52cc1282bac1ae3aa49fd061a"
@@ -9303,7 +9509,7 @@ safe-buffer@5.1.2, safe-buffer@~5.1.0, safe-buffer@~5.1.1:
   resolved "https://registry.yarnpkg.com/safe-buffer/-/safe-buffer-5.1.2.tgz#991ec69d296e0313747d59bdfd2b745c35f8828d"
   integrity sha512-Gd2UZBJDkXlY7GbJxfsE8/nvKkUEU1G38c1siN6QP6a9PT9MmHB8GnpscSmMJSoF8LOIrt8ud/wPtojys4G6+g==
 
-safe-buffer@^5.0.1, safe-buffer@^5.1.0, safe-buffer@^5.1.1, safe-buffer@~5.2.0:
+safe-buffer@^5.0.1, safe-buffer@^5.1.0, safe-buffer@^5.1.1, safe-buffer@^5.1.2, safe-buffer@^5.2.1, safe-buffer@~5.2.0:
   version "5.2.1"
   resolved "https://registry.yarnpkg.com/safe-buffer/-/safe-buffer-5.2.1.tgz#1eaf9fa9bdb1fdd4ec75f58f9cdb4e6b7827eec6"
   integrity sha512-rp3So07KcdmmKbGvgaNxQSJr7bGVSVk5S9Eq1F+ppbRo70+YeaDxkw5Dd8NPN+GD6bjnYm2VuPuCXmpuYvmCXQ==
@@ -9364,7 +9570,7 @@ selfsigned@^2.0.1:
     "@types/node-forge" "^1.3.0"
     node-forge "^1"
 
-semver@^6.3.1:
+semver@^6.0.0, semver@^6.3.1:
   version "6.3.1"
   resolved "https://registry.yarnpkg.com/semver/-/semver-6.3.1.tgz#556d2ef8689146e46dcea4bfdd095f3434dffcb4"
   integrity sha512-BR7VvDCVHO+q2xBEWskxS6DJE1qRnb7DxzUrogb71CWoSficBxYsiAGd+Kl0mmq/MprG9yArRkyrQxTO6XjMzA==
@@ -9383,6 +9589,11 @@ serialize-javascript@^6.0.1:
   dependencies:
     randombytes "^2.1.0"
 
+set-blocking@^2.0.0:
+  version "2.0.0"
+  resolved "https://registry.yarnpkg.com/set-blocking/-/set-blocking-2.0.0.tgz#045f9782d011ae9a6803ddd382b24392b3d890f7"
+  integrity sha512-KiKBS8AnWGEyLzofFfmvKwpdPzqiy16LvQfK3yv/fVH7Bj13/wl3JSR1J+rfgRE9q7xUJK4qvgS8raSOeLUehw==
+
 set-function-length@^1.2.1:
   version "1.2.2"
   resolved "https://registry.yarnpkg.com/set-function-length/-/set-function-length-1.2.2.tgz#aac72314198eaed975cf77b2c3b6b880695e5449"
@@ -9442,6 +9653,11 @@ siginfo@^2.0.0:
   resolved "https://registry.yarnpkg.com/siginfo/-/siginfo-2.0.0.tgz#32e76c70b79724e3bb567cb9d543eb858ccfaf30"
   integrity sha512-ybx0WO1/8bSBLEWXZvEd7gMW3Sn3JFlW3TvX1nREbDLRNQNaeNN8WK0meBwPdAaOI7TtRRRJn/Es1zhrrCHu7g==
 
+signal-exit@^3.0.0:
+  version "3.0.7"
+  resolved "https://registry.yarnpkg.com/signal-exit/-/signal-exit-3.0.7.tgz#a9a1767f8af84155114eaabd73f99273c8f59ad9"
+  integrity sha512-wnD2ZE+l+SPC/uoS0vXeE9L1+0wuaMqKlfz9AMUo38JsyLSBWSFcHR1Rri62LZc12vLr1gb3jl7iwQhgwpAbGQ==
+
 signal-exit@^4.0.1:
   version "4.1.0"
   resolved "https://registry.yarnpkg.com/signal-exit/-/signal-exit-4.1.0.tgz#952188c1cbd546070e2dd20d0f41c0ae0530cb04"
@@ -9660,7 +9876,7 @@ string-split-by@^1.0.0:
   dependencies:
     parenthesis "^3.1.5"
 
-"string-width-cjs@npm:string-width@^4.2.0", string-width@^4.1.0, string-width@^4.2.0:
+"string-width-cjs@npm:string-width@^4.2.0", "string-width@^1.0.2 || 2 || 3 || 4", string-width@^4.1.0, string-width@^4.2.0, string-width@^4.2.3:
   version "4.2.3"
   resolved "https://registry.yarnpkg.com/string-width/-/string-width-4.2.3.tgz#269c7117d27b05ad2e536830a8ec895ef9c6d010"
   integrity sha512-wKyQRQpjJ0sIp62ErSZdGsjMJWsap5oRNihHhu6G7JVO/9jIB6UyevL+tXuOqrng8j/cxKTWyWUwvSTriiZz/g==
@@ -9954,6 +10170,19 @@ tar-stream@^2.1.4:
     inherits "^2.0.3"
     readable-stream "^3.1.1"
 
+tar@^4.4.6:
+  version "4.4.19"
+  resolved "https://registry.yarnpkg.com/tar/-/tar-4.4.19.tgz#2e4d7263df26f2b914dee10c825ab132123742f3"
+  integrity sha512-a20gEsvHnWe0ygBY8JbxoM4w3SJdhc7ZAuxkLqh+nvNQN2IOt0B5lLgM490X5Hl8FF0dl0tOf2ewFYAlIFgzVA==
+  dependencies:
+    chownr "^1.1.4"
+    fs-minipass "^1.2.7"
+    minipass "^2.9.0"
+    minizlib "^1.3.3"
+    mkdirp "^0.5.5"
+    safe-buffer "^5.2.1"
+    yallist "^3.1.1"
+
 tar@^6.1.11, tar@^6.1.2:
   version "6.2.1"
   resolved "https://registry.yarnpkg.com/tar/-/tar-6.2.1.tgz#717549c541bc3c2af15751bea94b1dd068d4b03a"
@@ -10805,6 +11034,13 @@ why-is-node-running@^2.2.2:
     siginfo "^2.0.0"
     stackback "0.0.2"
 
+wide-align@^1.1.2:
+  version "1.1.5"
+  resolved "https://registry.yarnpkg.com/wide-align/-/wide-align-1.1.5.tgz#df1d4c206854369ecf3c9a4898f1b23fbd9d15d3"
+  integrity sha512-eDMORYaPNZ4sQIuuYPDHdQvf4gyCF9rEEV/yPxGfwPkRodwEgiMUUXTx/dex+Me0wxx53S+NgUHaP7y3MGlDmg==
+  dependencies:
+    string-width "^1.0.2 || 2 || 3 || 4"
+
 word-wrap@^1.2.5:
   version "1.2.5"
   resolved "https://registry.yarnpkg.com/word-wrap/-/word-wrap-1.2.5.tgz#d2c45c6dd4fbce621a66f136cbe328afd0410b34"
@@ -11071,7 +11307,7 @@ y18n@^5.0.5:
   resolved "https://registry.yarnpkg.com/y18n/-/y18n-5.0.8.tgz#7f4934d0f7ca8c56f95314939ddcd2dd91ce1d55"
   integrity sha512-0pfFzegeDWJHJIAmTLRP2DwHjdF5s7jo9tuztdQxAhINCdvS+3nGINqPd00AphqJR/0LhANUS6/+7SCb98YOfA==
 
-yallist@^3.0.2:
+yallist@^3.0.0, yallist@^3.0.2, yallist@^3.1.1:
   version "3.1.1"
   resolved "https://registry.yarnpkg.com/yallist/-/yallist-3.1.1.tgz#dbb7daf9bfd8bac9ab45ebf602b8cbad0d5d08fd"
   integrity sha512-a4UGQaWPH59mOXUYnAG2ewncQS4i4F43Tv3JoAM+s2VDAmS9NsK8GpDMLrCHPksFT7h3K6TOoUNn2pb7RoXx4g==