diff --git a/Reinforcement Learning/LunarLander/lunarlander_random-sampling.ipynb b/Reinforcement Learning/LunarLander/lunarlander_random-sampling.ipynb
index 3fa5614..07f6153 100644
--- a/Reinforcement Learning/LunarLander/lunarlander_random-sampling.ipynb	
+++ b/Reinforcement Learning/LunarLander/lunarlander_random-sampling.ipynb	
@@ -2,7 +2,7 @@
  "cells": [
   {
    "cell_type": "code",
-   "execution_count": 1,
+   "execution_count": 9,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -33,7 +33,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 5,
+   "execution_count": 10,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -54,12 +54,57 @@
     "    \n",
     "    return actions\n",
     "\n",
+    "def get_random_rewards(env, seed):\n",
+    "    actions = []\n",
+    "    rewards = []\n",
+    "\n",
+    "    env.seed(seed)\n",
+    "    env.reset()\n",
+    "\n",
+    "    step=0\n",
+    "    while True:\n",
+    "        step+=1\n",
+    "        action = env.action_space.sample()\n",
+    "        ob, reward, terminated, info = env.step(action)\n",
+    "        actions.append(action)\n",
+    "        rewards.append(reward)\n",
+    "        if terminated: \n",
+    "            break\n",
+    "    \n",
+    "    return rewards, actions\n",
+    "\n",
+    "\n",
+    "def explore_rewards(env, change_node, actions, old_rewards):\n",
+    "    env.seed(seed)\n",
+    "    env.reset()\n",
+    "\n",
+    "    new_actions = []\n",
+    "    rewards = old_rewards[:-change_node]\n",
+    "    \n",
+    "    step=0\n",
+    "    for action in actions[:-change_node]:\n",
+    "        step+=1\n",
+    "        ob, reward, terminated, info = env.step(action)\n",
+    "        new_actions.append(action)\n",
+    "        if terminated: break\n",
+    "\n",
+    "    if not terminated:\n",
+    "        # print(\"continue to explore\", len(new_actions))\n",
+    "        while True:\n",
+    "            step+=1\n",
+    "            action = env.action_space.sample()\n",
+    "            ob, reward, terminated, info = env.step(action)\n",
+    "            new_actions.append(action), rewards.append(reward)\n",
+    "            if terminated: break\n",
     "\n",
+    "    return rewards, new_actions\n",
+    "    \n",
     "def explore_actions(env, change_node, actions):\n",
     "    env.seed(seed)\n",
     "    env.reset()\n",
     "\n",
     "    new_actions = []\n",
+    "\n",
     "    step=0\n",
     "    for action in actions[:-change_node]:\n",
     "        step+=1\n",
@@ -79,27 +124,31 @@
     "    return new_actions\n",
     "\n",
     "\n",
-    "def find_best_actions(env, actions):\n",
+    "def find_best_rewards(n, env, actions, rewards):\n",
     "    best_actions=[]\n",
-    "    best_obs = []\n",
-    "    for ep in range(30):\n",
-    "        # print(f\" {ep} \".center(80, '*'))\n",
+    "    best_rewards = []\n",
+    "    for ep in range(n):\n",
+    "        print(f\" {ep} \".center(80, '*'))\n",
     "        change_node=1\n",
-    "        if len(actions) == 500:\n",
-    "            return actions\n",
+    "        # if len(actions) == 500:\n",
+    "        #     return actions\n",
     "        if len(best_actions)>0:\n",
     "            actions = best_actions\n",
+    "        if len(best_rewards)>1:\n",
+    "            rewards = best_rewards\n",
     "        best_actions = []\n",
-    "        while change_node<len(actions):\n",
-    "            new_actions = explore_actions(env, change_node, actions)\n",
-    "            if len(new_actions)>len(actions):\n",
+    "        best_rewards = [-1e6] \n",
+    "        while change_node<len(actions) and change_node < 100:\n",
+    "            new_rewards, new_actions = explore_rewards(env, change_node, actions, rewards)\n",
+    "            if sum(new_rewards)>sum(best_rewards):\n",
     "                # print(len(new_actions), len(actions), change_node)\n",
-    "                if len(new_actions)>len(best_actions):\n",
-    "                    # print(len(new_actions), len(actions), change_node)\n",
-    "                    best_actions=new_actions\n",
+    "                #if len(new_actions)>len(best_actions):\n",
+    "                print(len(new_actions), len(actions), change_node, sum(new_rewards))\n",
+    "                best_actions=new_actions\n",
+    "                best_rewards=new_rewards\n",
     "            change_node+=1\n",
     "        \n",
-    "    return best_actions if len(best_actions)>len(actions) else actions\n",
+    "    return best_actions, best_rewards if len(best_actions)>len(actions) else actions\n",
     "\n",
     "\n",
     "def get_obs(env, actions_result):\n",
@@ -123,54 +172,97 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 6,
+   "execution_count": 11,
    "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "************************************** 0 ***************************************\n",
+      "102 102 1 -149.00837859088466\n",
+      "102 102 5 -141.56807267363666\n",
+      "102 102 16 -128.0574026152969\n",
+      "102 102 24 -125.55624193964569\n",
+      "104 102 28 -123.26043672630061\n",
+      "103 102 30 -115.39515325051048\n",
+      "124 102 32 -13.980617247773807\n",
+      "120 102 34 10.231381970022568\n",
+      "************************************** 1 ***************************************\n",
+      "120 120 1 10.231381970022568\n",
+      "120 120 4 11.686774316464266\n",
+      "1000 120 34 73.23397092383432\n",
+      "************************************** 2 ***************************************\n",
+      "1000 1000 1 73.60809107572727\n",
+      "1000 1000 3 75.35113808488286\n",
+      "1000 1000 9 77.36513234831155\n",
+      "1000 1000 13 77.63069625639572\n",
+      "1000 1000 16 78.33312204631495\n",
+      "1000 1000 59 78.48170719275846\n"
+     ]
+    }
+   ],
    "source": [
-    "# seed = 10\n",
-    "# actions = get_random_actions(env, seed)\n",
-    "# best_actions = find_best_actions(env, actions)"
+    "seed=0\n",
+    "rewards, actions = get_random_rewards(env, seed)\n",
+    "\n",
+    "new_r, new_act = explore_rewards(env, 50, actions, rewards)\n",
+    "# print(sum(new_r), sum(rewards))\n",
+    "best_actions, best_rewards = find_best_rewards(3, env, actions, rewards)"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 10,
+   "execution_count": 13,
    "metadata": {},
    "outputs": [
     {
-     "data": {
-      "text/plain": [
-       "98"
-      ]
-     },
-     "execution_count": 10,
-     "metadata": {},
-     "output_type": "execute_result"
+     "ename": "KeyboardInterrupt",
+     "evalue": "",
+     "output_type": "error",
+     "traceback": [
+      "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
+      "\u001b[1;31mKeyboardInterrupt\u001b[0m                         Traceback (most recent call last)",
+      "\u001b[1;32mc:\\Users\\stefano.giannini_ama\\Documents\\Python\\Learn\\data-science_projects\\Reinforcement Learning\\LunarLander\\lunarlander_random-sampling.ipynb Cell 5\u001b[0m in \u001b[0;36m<cell line: 9>\u001b[1;34m()\u001b[0m\n\u001b[0;32m     <a href='vscode-notebook-cell:/c%3A/Users/stefano.giannini_ama/Documents/Python/Learn/data-science_projects/Reinforcement%20Learning/LunarLander/lunarlander_random-sampling.ipynb#W4sZmlsZQ%3D%3D?line=11'>12</a>\u001b[0m     step\u001b[39m+\u001b[39m\u001b[39m=\u001b[39m\u001b[39m1\u001b[39m\n\u001b[0;32m     <a href='vscode-notebook-cell:/c%3A/Users/stefano.giannini_ama/Documents/Python/Learn/data-science_projects/Reinforcement%20Learning/LunarLander/lunarlander_random-sampling.ipynb#W4sZmlsZQ%3D%3D?line=12'>13</a>\u001b[0m     env\u001b[39m.\u001b[39mrender()\n\u001b[1;32m---> <a href='vscode-notebook-cell:/c%3A/Users/stefano.giannini_ama/Documents/Python/Learn/data-science_projects/Reinforcement%20Learning/LunarLander/lunarlander_random-sampling.ipynb#W4sZmlsZQ%3D%3D?line=13'>14</a>\u001b[0m     time\u001b[39m.\u001b[39;49msleep(\u001b[39m0.2\u001b[39;49m)\n\u001b[0;32m     <a href='vscode-notebook-cell:/c%3A/Users/stefano.giannini_ama/Documents/Python/Learn/data-science_projects/Reinforcement%20Learning/LunarLander/lunarlander_random-sampling.ipynb#W4sZmlsZQ%3D%3D?line=14'>15</a>\u001b[0m     \u001b[39mif\u001b[39;00m terminated: \u001b[39mbreak\u001b[39;00m\u001b[39m#steps.append(step);break\u001b[39;00m\n\u001b[0;32m     <a href='vscode-notebook-cell:/c%3A/Users/stefano.giannini_ama/Documents/Python/Learn/data-science_projects/Reinforcement%20Learning/LunarLander/lunarlander_random-sampling.ipynb#W4sZmlsZQ%3D%3D?line=16'>17</a>\u001b[0m env\u001b[39m.\u001b[39mclose()\n",
+      "\u001b[1;31mKeyboardInterrupt\u001b[0m: "
+     ]
     }
    ],
    "source": [
-    "actions = get_random_actions(env, seed)\n",
-    "len(actions)"
+    "import time\n",
+    "step=0\n",
+    "env = gym.make(\"LunarLander-v2\")\n",
+    "env.seed(seed)\n",
+    "env.reset()\n",
+    "\n",
+    "while True:\n",
+    "    # print(ob.reshape(1, -1).shape)\n",
+    "    action = best_actions[step]\n",
+    "    # action = res[seeds[4]][step]\n",
+    "    ob, reward, terminated, info = env.step(action)\n",
+    "    step+=1\n",
+    "    env.render()\n",
+    "    time.sleep(0.05)\n",
+    "    print(step)\n",
+    "    if terminated: break#steps.append(step);break\n",
+    "\n",
+    "env.close()"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 7,
+   "execution_count": null,
    "metadata": {},
    "outputs": [
     {
-     "ename": "KeyboardInterrupt",
-     "evalue": "",
+     "ename": "NameError",
+     "evalue": "name 'find_best_actions' is not defined",
      "output_type": "error",
      "traceback": [
       "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
-      "\u001b[1;31mKeyboardInterrupt\u001b[0m                         Traceback (most recent call last)",
+      "\u001b[1;31mNameError\u001b[0m                                 Traceback (most recent call last)",
       "\u001b[1;32mc:\\Users\\stefano.giannini_ama\\Documents\\Python\\Learn\\data-science_projects\\Reinforcement Learning\\LunarLander\\lunarlander_random-sampling.ipynb Cell 6\u001b[0m in \u001b[0;36m<cell line: 3>\u001b[1;34m()\u001b[0m\n\u001b[0;32m      <a href='vscode-notebook-cell:/c%3A/Users/stefano.giannini_ama/Documents/Python/Learn/data-science_projects/Reinforcement%20Learning/LunarLander/lunarlander_random-sampling.ipynb#W5sZmlsZQ%3D%3D?line=2'>3</a>\u001b[0m \u001b[39mfor\u001b[39;00m seed \u001b[39min\u001b[39;00m seeds[:\u001b[39m2\u001b[39m]:\n\u001b[0;32m      <a href='vscode-notebook-cell:/c%3A/Users/stefano.giannini_ama/Documents/Python/Learn/data-science_projects/Reinforcement%20Learning/LunarLander/lunarlander_random-sampling.ipynb#W5sZmlsZQ%3D%3D?line=3'>4</a>\u001b[0m     actions \u001b[39m=\u001b[39m get_random_actions(env, seed)\n\u001b[1;32m----> <a href='vscode-notebook-cell:/c%3A/Users/stefano.giannini_ama/Documents/Python/Learn/data-science_projects/Reinforcement%20Learning/LunarLander/lunarlander_random-sampling.ipynb#W5sZmlsZQ%3D%3D?line=4'>5</a>\u001b[0m     best_actions \u001b[39m=\u001b[39m find_best_actions(env, actions)\n\u001b[0;32m      <a href='vscode-notebook-cell:/c%3A/Users/stefano.giannini_ama/Documents/Python/Learn/data-science_projects/Reinforcement%20Learning/LunarLander/lunarlander_random-sampling.ipynb#W5sZmlsZQ%3D%3D?line=5'>6</a>\u001b[0m     \u001b[39mprint\u001b[39m(\u001b[39mf\u001b[39m\u001b[39m\"\u001b[39m\u001b[39m[\u001b[39m\u001b[39m{\u001b[39;00mseed\u001b[39m}\u001b[39;00m\u001b[39m] Actions length improvement:\u001b[39m\u001b[39m\"\u001b[39m,\u001b[39mlen\u001b[39m(actions), \u001b[39m\"\u001b[39m\u001b[39m->\u001b[39m\u001b[39m\"\u001b[39m, \u001b[39mlen\u001b[39m(best_actions))\n\u001b[0;32m      <a href='vscode-notebook-cell:/c%3A/Users/stefano.giannini_ama/Documents/Python/Learn/data-science_projects/Reinforcement%20Learning/LunarLander/lunarlander_random-sampling.ipynb#W5sZmlsZQ%3D%3D?line=6'>7</a>\u001b[0m     res[seed] \u001b[39m=\u001b[39m best_actions\n",
-      "\u001b[1;32mc:\\Users\\stefano.giannini_ama\\Documents\\Python\\Learn\\data-science_projects\\Reinforcement Learning\\LunarLander\\lunarlander_random-sampling.ipynb Cell 6\u001b[0m in \u001b[0;36mfind_best_actions\u001b[1;34m(env, actions)\u001b[0m\n\u001b[0;32m     <a href='vscode-notebook-cell:/c%3A/Users/stefano.giannini_ama/Documents/Python/Learn/data-science_projects/Reinforcement%20Learning/LunarLander/lunarlander_random-sampling.ipynb#W5sZmlsZQ%3D%3D?line=52'>53</a>\u001b[0m best_actions \u001b[39m=\u001b[39m []\n\u001b[0;32m     <a href='vscode-notebook-cell:/c%3A/Users/stefano.giannini_ama/Documents/Python/Learn/data-science_projects/Reinforcement%20Learning/LunarLander/lunarlander_random-sampling.ipynb#W5sZmlsZQ%3D%3D?line=53'>54</a>\u001b[0m \u001b[39mwhile\u001b[39;00m change_node\u001b[39m<\u001b[39m\u001b[39mlen\u001b[39m(actions):\n\u001b[1;32m---> <a href='vscode-notebook-cell:/c%3A/Users/stefano.giannini_ama/Documents/Python/Learn/data-science_projects/Reinforcement%20Learning/LunarLander/lunarlander_random-sampling.ipynb#W5sZmlsZQ%3D%3D?line=54'>55</a>\u001b[0m     new_actions \u001b[39m=\u001b[39m explore_actions(env, change_node, actions)\n\u001b[0;32m     <a href='vscode-notebook-cell:/c%3A/Users/stefano.giannini_ama/Documents/Python/Learn/data-science_projects/Reinforcement%20Learning/LunarLander/lunarlander_random-sampling.ipynb#W5sZmlsZQ%3D%3D?line=55'>56</a>\u001b[0m     \u001b[39mif\u001b[39;00m \u001b[39mlen\u001b[39m(new_actions)\u001b[39m>\u001b[39m\u001b[39mlen\u001b[39m(actions):\n\u001b[0;32m     <a href='vscode-notebook-cell:/c%3A/Users/stefano.giannini_ama/Documents/Python/Learn/data-science_projects/Reinforcement%20Learning/LunarLander/lunarlander_random-sampling.ipynb#W5sZmlsZQ%3D%3D?line=56'>57</a>\u001b[0m         \u001b[39m# print(len(new_actions), len(actions), change_node)\u001b[39;00m\n\u001b[0;32m     <a href='vscode-notebook-cell:/c%3A/Users/stefano.giannini_ama/Documents/Python/Learn/data-science_projects/Reinforcement%20Learning/LunarLander/lunarlander_random-sampling.ipynb#W5sZmlsZQ%3D%3D?line=57'>58</a>\u001b[0m         \u001b[39mif\u001b[39;00m \u001b[39mlen\u001b[39m(new_actions)\u001b[39m>\u001b[39m\u001b[39mlen\u001b[39m(best_actions):\n\u001b[0;32m     <a href='vscode-notebook-cell:/c%3A/Users/stefano.giannini_ama/Documents/Python/Learn/data-science_projects/Reinforcement%20Learning/LunarLander/lunarlander_random-sampling.ipynb#W5sZmlsZQ%3D%3D?line=58'>59</a>\u001b[0m             \u001b[39m# print(len(new_actions), len(actions), change_node)\u001b[39;00m\n",
-      "\u001b[1;32mc:\\Users\\stefano.giannini_ama\\Documents\\Python\\Learn\\data-science_projects\\Reinforcement Learning\\LunarLander\\lunarlander_random-sampling.ipynb Cell 6\u001b[0m in \u001b[0;36mexplore_actions\u001b[1;34m(env, change_node, actions)\u001b[0m\n\u001b[0;32m     <a href='vscode-notebook-cell:/c%3A/Users/stefano.giannini_ama/Documents/Python/Learn/data-science_projects/Reinforcement%20Learning/LunarLander/lunarlander_random-sampling.ipynb#W5sZmlsZQ%3D%3D?line=24'>25</a>\u001b[0m \u001b[39mfor\u001b[39;00m action \u001b[39min\u001b[39;00m actions[:\u001b[39m-\u001b[39mchange_node]:\n\u001b[0;32m     <a href='vscode-notebook-cell:/c%3A/Users/stefano.giannini_ama/Documents/Python/Learn/data-science_projects/Reinforcement%20Learning/LunarLander/lunarlander_random-sampling.ipynb#W5sZmlsZQ%3D%3D?line=25'>26</a>\u001b[0m     step\u001b[39m+\u001b[39m\u001b[39m=\u001b[39m\u001b[39m1\u001b[39m\n\u001b[1;32m---> <a href='vscode-notebook-cell:/c%3A/Users/stefano.giannini_ama/Documents/Python/Learn/data-science_projects/Reinforcement%20Learning/LunarLander/lunarlander_random-sampling.ipynb#W5sZmlsZQ%3D%3D?line=26'>27</a>\u001b[0m     ob, rewards, terminated, info \u001b[39m=\u001b[39m env\u001b[39m.\u001b[39;49mstep(action)\n\u001b[0;32m     <a href='vscode-notebook-cell:/c%3A/Users/stefano.giannini_ama/Documents/Python/Learn/data-science_projects/Reinforcement%20Learning/LunarLander/lunarlander_random-sampling.ipynb#W5sZmlsZQ%3D%3D?line=27'>28</a>\u001b[0m     new_actions\u001b[39m.\u001b[39mappend(action)\n\u001b[0;32m     <a href='vscode-notebook-cell:/c%3A/Users/stefano.giannini_ama/Documents/Python/Learn/data-science_projects/Reinforcement%20Learning/LunarLander/lunarlander_random-sampling.ipynb#W5sZmlsZQ%3D%3D?line=28'>29</a>\u001b[0m     \u001b[39mif\u001b[39;00m terminated: \u001b[39mbreak\u001b[39;00m\n",
-      "File \u001b[1;32mc:\\Users\\stefano.giannini_ama\\Anaconda3\\lib\\site-packages\\gym\\wrappers\\time_limit.py:18\u001b[0m, in \u001b[0;36mTimeLimit.step\u001b[1;34m(self, action)\u001b[0m\n\u001b[0;32m     14\u001b[0m \u001b[39mdef\u001b[39;00m \u001b[39mstep\u001b[39m(\u001b[39mself\u001b[39m, action):\n\u001b[0;32m     15\u001b[0m     \u001b[39massert\u001b[39;00m (\n\u001b[0;32m     16\u001b[0m         \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_elapsed_steps \u001b[39mis\u001b[39;00m \u001b[39mnot\u001b[39;00m \u001b[39mNone\u001b[39;00m\n\u001b[0;32m     17\u001b[0m     ), \u001b[39m\"\u001b[39m\u001b[39mCannot call env.step() before calling reset()\u001b[39m\u001b[39m\"\u001b[39m\n\u001b[1;32m---> 18\u001b[0m     observation, reward, done, info \u001b[39m=\u001b[39m \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49menv\u001b[39m.\u001b[39;49mstep(action)\n\u001b[0;32m     19\u001b[0m     \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_elapsed_steps \u001b[39m+\u001b[39m\u001b[39m=\u001b[39m \u001b[39m1\u001b[39m\n\u001b[0;32m     20\u001b[0m     \u001b[39mif\u001b[39;00m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_elapsed_steps \u001b[39m>\u001b[39m\u001b[39m=\u001b[39m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_max_episode_steps:\n",
-      "File \u001b[1;32mc:\\Users\\stefano.giannini_ama\\Anaconda3\\lib\\site-packages\\gym\\envs\\box2d\\lunar_lander.py:350\u001b[0m, in \u001b[0;36mLunarLander.step\u001b[1;34m(self, action)\u001b[0m\n\u001b[0;32m    346\u001b[0m pos \u001b[39m=\u001b[39m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39mlander\u001b[39m.\u001b[39mposition\n\u001b[0;32m    347\u001b[0m vel \u001b[39m=\u001b[39m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39mlander\u001b[39m.\u001b[39mlinearVelocity\n\u001b[0;32m    348\u001b[0m state \u001b[39m=\u001b[39m [\n\u001b[0;32m    349\u001b[0m     (pos\u001b[39m.\u001b[39mx \u001b[39m-\u001b[39m VIEWPORT_W \u001b[39m/\u001b[39m SCALE \u001b[39m/\u001b[39m \u001b[39m2\u001b[39m) \u001b[39m/\u001b[39m (VIEWPORT_W \u001b[39m/\u001b[39m SCALE \u001b[39m/\u001b[39m \u001b[39m2\u001b[39m),\n\u001b[1;32m--> 350\u001b[0m     (pos\u001b[39m.\u001b[39my \u001b[39m-\u001b[39m (\u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39mhelipad_y \u001b[39m+\u001b[39m LEG_DOWN \u001b[39m/\u001b[39m SCALE)) \u001b[39m/\u001b[39m (VIEWPORT_H \u001b[39m/\u001b[39m SCALE \u001b[39m/\u001b[39m \u001b[39m2\u001b[39m),\n\u001b[0;32m    351\u001b[0m     vel\u001b[39m.\u001b[39mx \u001b[39m*\u001b[39m (VIEWPORT_W \u001b[39m/\u001b[39m SCALE \u001b[39m/\u001b[39m \u001b[39m2\u001b[39m) \u001b[39m/\u001b[39m FPS,\n\u001b[0;32m    352\u001b[0m     vel\u001b[39m.\u001b[39my \u001b[39m*\u001b[39m (VIEWPORT_H \u001b[39m/\u001b[39m SCALE \u001b[39m/\u001b[39m \u001b[39m2\u001b[39m) \u001b[39m/\u001b[39m FPS,\n\u001b[0;32m    353\u001b[0m     \u001b[39mself\u001b[39m\u001b[39m.\u001b[39mlander\u001b[39m.\u001b[39mangle,\n\u001b[0;32m    354\u001b[0m     \u001b[39m20.0\u001b[39m \u001b[39m*\u001b[39m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39mlander\u001b[39m.\u001b[39mangularVelocity \u001b[39m/\u001b[39m FPS,\n\u001b[0;32m    355\u001b[0m     \u001b[39m1.0\u001b[39m \u001b[39mif\u001b[39;00m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39mlegs[\u001b[39m0\u001b[39m]\u001b[39m.\u001b[39mground_contact \u001b[39melse\u001b[39;00m \u001b[39m0.0\u001b[39m,\n\u001b[0;32m    356\u001b[0m     \u001b[39m1.0\u001b[39m \u001b[39mif\u001b[39;00m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39mlegs[\u001b[39m1\u001b[39m]\u001b[39m.\u001b[39mground_contact \u001b[39melse\u001b[39;00m \u001b[39m0.0\u001b[39m,\n\u001b[0;32m    357\u001b[0m ]\n\u001b[0;32m    358\u001b[0m \u001b[39massert\u001b[39;00m \u001b[39mlen\u001b[39m(state) \u001b[39m==\u001b[39m \u001b[39m8\u001b[39m\n\u001b[0;32m    360\u001b[0m reward \u001b[39m=\u001b[39m \u001b[39m0\u001b[39m\n",
-      "\u001b[1;31mKeyboardInterrupt\u001b[0m: "
+      "\u001b[1;31mNameError\u001b[0m: name 'find_best_actions' is not defined"
      ]
     }
    ],
@@ -179,9 +271,9 @@
     "seeds = [0, 1, 5, 10, 21, 42, 47, 63, 84, 100, 121, 144]\n",
     "for seed in seeds[:2]:\n",
     "    actions = get_random_actions(env, seed)\n",
-    "    best_actions = find_best_actions(env, actions)\n",
-    "    print(f\"[{seed}] Actions length improvement:\",len(actions), \"->\", len(best_actions))\n",
-    "    res[seed] = best_actions"
+    "    # best_actions = find_best_actions(env, actions)\n",
+    "    # print(f\"[{seed}] Actions length improvement:\",len(actions), \"->\", len(best_actions))\n",
+    "    # res[seed] = best_actions"
    ]
   },
   {