From 319d2f5d547795b2b00d22969a016fe01e71c2cd Mon Sep 17 00:00:00 2001 From: Rodrigo de Salvo Braz Date: Thu, 15 Feb 2024 17:13:13 -0800 Subject: [PATCH] Add execution output to SIngle Item Recommender System tutorial Summary: Add execution output to SIngle Item Recommender System tutorial Reviewed By: yiwan-rl Differential Revision: D53793766 fbshipit-source-id: 46eb69db8b47367bafd3ce96ca77dbeac1ef299d --- .../single_item_recommender_system.ipynb | 10791 +++++++++++++++- 1 file changed, 10431 insertions(+), 360 deletions(-) diff --git a/tutorials/single_item_recommender_system_example/single_item_recommender_system.ipynb b/tutorials/single_item_recommender_system_example/single_item_recommender_system.ipynb index 8b681f6c..01b4aa1b 100644 --- a/tutorials/single_item_recommender_system_example/single_item_recommender_system.ipynb +++ b/tutorials/single_item_recommender_system_example/single_item_recommender_system.ipynb @@ -1,388 +1,10459 @@ { - "cells": [ - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Single Item Recommender System\n", - "\n", - "Here is a [better rendering](https://nbviewer.org/github/facebookresearch/Pearl/blob/main/tutorials/single_item_recommender_system_example/single_item_recommender_system.ipynb) of this notebook on [nbviewer](https://nbviewer.org/)." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "8NNfwWXGvn_o", - "output": { - "id": 383783884102102, - "loadingStatus": "loaded" - } - }, - "outputs": [], - "source": [ - "%load_ext autoreload\n", - "%autoreload 2" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Installation\n", - "If you haven't installed Pearl, please make sure you install Pearl with the following cell. Otherwise, you can skip the cell below." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "1uLHbYlegKX-" - }, - "outputs": [], - "source": [ - "%pip uninstall Pearl -y\n", - "%rm -rf Pearl\n", - "!git clone https://github.com/facebookresearch/Pearl.git\n", - "%cd Pearl\n", - "%pip install .\n", - "%cd .." - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Import Modules" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "vcb70ZC_h3OA" - }, - "outputs": [], - "source": [ - "from pearl.neural_networks.sequential_decision_making.q_value_networks import EnsembleQValueNetwork\n", - "from pearl.replay_buffers.sequential_decision_making.bootstrap_replay_buffer import BootstrapReplayBuffer\n", - "from pearl.policy_learners.sequential_decision_making.bootstrapped_dqn import BootstrappedDQN\n", - "from pearl.utils.functional_utils.experimentation.set_seed import set_seed\n", - "from pearl.action_representation_modules.identity_action_representation_module import IdentityActionRepresentationModule\n", - "from pearl.history_summarization_modules.lstm_history_summarization_module import LSTMHistorySummarizationModule\n", - "from pearl.policy_learners.sequential_decision_making.deep_q_learning import DeepQLearning\n", - "from pearl.replay_buffers.sequential_decision_making.fifo_off_policy_replay_buffer import FIFOOffPolicyReplayBuffer\n", - "from pearl.utils.functional_utils.train_and_eval.online_learning import online_learning\n", - "from pearl.pearl_agent import PearlAgent\n", - "from pearl.tutorials.single_item_recommender_system_example.env_model import SequenceClassificationModel\n", - "from pearl.tutorials.single_item_recommender_system_example.env import RecEnv\n", - "import torch\n", - "import matplotlib.pyplot as plt\n", - "import numpy as np\n", - "\n", - "set_seed(0)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Load Environment\n", - "This environment's underlying model is trained using the MIND dataset (Wu et al. 2020).\n", - "\n", - "Each data point:\n", - "- A history of impressions clicked by a user\n", - "- Each impression is represented by an 100-dim vector\n", - "- A list of impressions and whether or not they are clicked\n", - "\n", - "The environment is constructed with the following setup. Note that this example is a contrived example to illustrate Pearl's usage, agent modularity and a subset of features. Not to represent a real-world environment or problem. \n", - "- State: a history of impressions by a user (note that we used the history of impressions of instead of clicked impressions to speed up learning in this example. Interested Pearl users can change it to history of clicked impressions with much longer episode length and samples to run the following experiments.)\n", - "- Dynamic action space: two randomly picked impressions\n", - "- Action: one of the two impressions\n", - "- Reward: click\n", - "- Reset every 20 steps.\n" - ] + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Single Item Recommender System\n", + "\n", + "Here is a [better rendering](https://nbviewer.org/github/facebookresearch/Pearl/blob/main/tutorials/single_item_recommender_system_example/single_item_recommender_system.ipynb) of this notebook on [nbviewer](https://nbviewer.org/)." + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": { + "id": "8NNfwWXGvn_o", + "output": { + "id": 383783884102102, + "loadingStatus": "loaded" }, + "vscode": { + "languageId": "python" + } + }, + "outputs": [ { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "g1VHtmldi3A2", - "output": { - "id": 1038395970722928, - "loadingStatus": "loaded" - } - }, - "outputs": [], - "source": [ - "# load environment\n", - "device = torch.device(\"cuda\" if torch.cuda.is_available() else \"cpu\")\n", - "model = SequenceClassificationModel(100).to(device)\n", - "model.load_state_dict(torch.load(\"Pearl/pearl/tutorials/single_item_recommender_system_example/env_model_state_dict.pt\"))\n", - "actions = torch.load(\"Pearl/pearl/tutorials/single_item_recommender_system_example/news_embedding_small.pt\")\n", - "env = RecEnv(list(actions.values())[:100], model)\n", - "observation, action_space = env.reset()\n", - "\n", - "# experiment code\n", - "number_of_steps = 100000\n", - "record_period = 400" - ] - }, + "name": "stderr", + "output_type": "stream", + "text": [ + "I0213 175552.433 magics.py:84] %autoreload was called with parameter_s='2'\n" + ] + } + ], + "source": [ + "%load_ext autoreload\n", + "%autoreload 2" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Installation\n", + "If you haven't installed Pearl, please make sure you install Pearl with the following cell. Otherwise, you can skip the cell below." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "1uLHbYlegKX-", + "vscode": { + "languageId": "python" + } + }, + "outputs": [], + "source": [ + "%pip uninstall Pearl -y\n", + "%rm -rf Pearl\n", + "!git clone https://github.com/facebookresearch/Pearl.git\n", + "%cd Pearl\n", + "%pip install .\n", + "%cd .." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Import Modules" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": { + "id": "vcb70ZC_h3OA", + "vscode": { + "languageId": "python" + } + }, + "outputs": [ { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Vanilla DQN Agent\n", - "Able to handle dynamic action space but not able to handle partial observability and sparse reward." - ] + "name": "stderr", + "output_type": "stream", + "text": [ + "I0213 175559.072 box.py:24] Using 'gymnasium' package.\n", + "I0213 175559.074 box_action.py:25] Using 'gymnasium' package.\n", + "I0213 175559.077 discrete.py:22] Using 'gymnasium' package.\n", + "I0213 175559.079 discrete_action.py:25] Using 'gymnasium' package.\n" + ] + } + ], + "source": [ + "from pearl.neural_networks.sequential_decision_making.q_value_networks import EnsembleQValueNetwork\n", + "from pearl.replay_buffers.sequential_decision_making.bootstrap_replay_buffer import BootstrapReplayBuffer\n", + "from pearl.policy_learners.sequential_decision_making.bootstrapped_dqn import BootstrappedDQN\n", + "from pearl.utils.functional_utils.experimentation.set_seed import set_seed\n", + "from pearl.action_representation_modules.identity_action_representation_module import IdentityActionRepresentationModule\n", + "from pearl.history_summarization_modules.lstm_history_summarization_module import LSTMHistorySummarizationModule\n", + "from pearl.policy_learners.sequential_decision_making.deep_q_learning import DeepQLearning\n", + "from pearl.replay_buffers.sequential_decision_making.fifo_off_policy_replay_buffer import FIFOOffPolicyReplayBuffer\n", + "from pearl.utils.functional_utils.train_and_eval.online_learning import online_learning\n", + "from pearl.pearl_agent import PearlAgent\n", + "from pearl.tutorials.single_item_recommender_system_example.env_model import SequenceClassificationModel\n", + "from pearl.tutorials.single_item_recommender_system_example.env import RecEnv\n", + "import torch\n", + "import matplotlib.pyplot as plt\n", + "import numpy as np\n", + "\n", + "set_seed(0)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Load Environment\n", + "This environment's underlying model is trained using the MIND dataset (Wu et al. 2020).\n", + "\n", + "Each data point:\n", + "- A history of impressions clicked by a user\n", + "- Each impression is represented by an 100-dim vector\n", + "- A list of impressions and whether or not they are clicked\n", + "\n", + "The environment is constructed with the following setup. Note that this example is a contrived example to illustrate Pearl's usage, agent modularity and a subset of features. Not to represent a real-world environment or problem. \n", + "- State: a history of impressions by a user (note that we used the history of impressions of instead of clicked impressions to speed up learning in this example. Interested Pearl users can change it to history of clicked impressions with much longer episode length and samples to run the following experiments.)\n", + "- Dynamic action space: two randomly picked impressions\n", + "- Action: one of the two impressions\n", + "- Reward: click\n", + "- Reset every 20 steps.\n" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": { + "id": "g1VHtmldi3A2", + "output": { + "id": 1038395970722928, + "loadingStatus": "loaded" }, + "vscode": { + "languageId": "python" + } + }, + "outputs": [], + "source": [ + "# load environment\n", + "device = torch.device(\"cuda\" if torch.cuda.is_available() else \"cpu\")\n", + "model = SequenceClassificationModel(100).to(device)\n", + "model.load_state_dict(torch.load(\"Pearl/tutorials/single_item_recommender_system_example/env_model_state_dict.pt\"))\n", + "actions = torch.load(\"Pearl/tutorials/single_item_recommender_system_example/news_embedding_small.pt\")\n", + "env = RecEnv(list(actions.values())[:100], model)\n", + "observation, action_space = env.reset()\n", + "\n", + "# experiment code\n", + "number_of_steps = 100000\n", + "record_period = 400" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Vanilla DQN Agent\n", + "Able to handle dynamic action space but not able to handle partial observability and sparse reward." + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": { + "id": "kulkpFAvnOQx", + "vscode": { + "languageId": "python" + } + }, + "outputs": [ { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "kulkpFAvnOQx" - }, - "outputs": [], - "source": [ - "# create a pearl agent\n", - "\n", - "action_representation_module = IdentityActionRepresentationModule(\n", - " max_number_actions=action_space.n,\n", - " representation_dim=action_space.action_dim,\n", - ")\n", - "\n", - "# DQN-vanilla\n", - "agent = PearlAgent(\n", - " policy_learner=DeepQLearning(\n", - " state_dim=1,\n", - " action_space=action_space,\n", - " hidden_dims=[64, 64],\n", - " training_rounds=50,\n", - " action_representation_module=action_representation_module,\n", - " ),\n", - " replay_buffer=FIFOOffPolicyReplayBuffer(100_000),\n", - " device_id=-1,\n", - ")\n", - "\n", - "info = online_learning(\n", - " agent=agent,\n", - " env=env,\n", - " number_of_steps=number_of_steps,\n", - " print_every_x_steps=100,\n", - " record_period=record_period,\n", - " learn_after_episode=True,\n", - ")\n", - "torch.save(info[\"return\"], \"DQN-return.pt\")\n", - "plt.plot(record_period * np.arange(len(info[\"return\"])), info[\"return\"], label=\"DQN\")\n", - "plt.legend()\n", - "plt.show()" - ] + "name": "stdout", + "output_type": "stream", + "text": [ + "episode 5, step 100, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 3.0\n", + "episode 10, step 200, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 2.0\n", + "episode 15, step 300, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 1.0\n", + "episode 20, step 400, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 2.0\n", + "episode 25, step 500, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 4.0\n", + "episode 30, step 600, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 1.0\n", + "episode 35, step 700, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 2.0\n", + "episode 40, step 800, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 1.0\n", + "episode 45, step 900, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 0.0\n", + "episode 50, step 1000, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 3.0\n", + "episode 55, step 1100, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 1.0\n", + "episode 60, step 1200, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 0.0\n", + "episode 65, step 1300, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 2.0\n", + "episode 70, step 1400, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 1.0\n", + "episode 75, step 1500, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 2.0\n", + "episode 80, step 1600, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 4.0\n", + "episode 85, step 1700, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 3.0\n", + "episode 90, step 1800, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 0.0\n", + "episode 95, step 1900, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 2.0\n", + "episode 100, step 2000, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 2.0\n", + "episode 105, step 2100, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 1.0\n", + "episode 110, step 2200, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 1.0\n", + "episode 115, step 2300, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 0.0\n", + "episode 120, step 2400, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 3.0\n", + "episode 125, step 2500, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 0.0\n", + "episode 130, step 2600, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 2.0\n", + "episode 135, step 2700, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 1.0\n", + "episode 140, step 2800, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 4.0\n", + "episode 145, step 2900, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 1.0\n", + "episode 150, step 3000, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 3.0\n", + "episode 155, step 3100, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 0.0\n", + "episode 160, step 3200, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 3.0\n", + "episode 165, step 3300, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 2.0\n", + "episode 170, step 3400, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 2.0\n", + "episode 175, step 3500, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 3.0\n", + "episode 180, step 3600, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 1.0\n", + "episode 185, step 3700, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 1.0\n", + "episode 190, step 3800, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 4.0\n", + "episode 195, step 3900, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 0.0\n", + "episode 200, step 4000, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 0.0\n", + "episode 205, step 4100, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 2.0\n", + "episode 210, step 4200, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 2.0\n", + "episode 215, step 4300, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 2.0\n", + "episode 220, step 4400, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 2.0\n", + "episode 225, step 4500, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 3.0\n", + "episode 230, step 4600, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 2.0\n", + "episode 235, step 4700, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 0.0\n", + "episode 240, step 4800, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 3.0\n", + "episode 245, step 4900, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 1.0\n", + "episode 250, step 5000, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 2.0\n", + "episode 255, step 5100, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 1.0\n", + "episode 260, step 5200, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 1.0\n", + "episode 265, step 5300, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 3.0\n", + "episode 270, step 5400, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 3.0\n", + "episode 275, step 5500, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 1.0\n", + "episode 280, step 5600, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 2.0\n", + "episode 285, step 5700, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 2.0\n", + "episode 290, step 5800, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 0.0\n", + "episode 295, step 5900, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 3.0\n", + "episode 300, step 6000, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 1.0\n", + "episode 305, step 6100, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 2.0\n", + "episode 310, step 6200, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 1.0\n", + "episode 315, step 6300, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 1.0\n", + "episode 320, step 6400, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 2.0\n", + "episode 325, step 6500, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 2.0\n", + "episode 330, step 6600, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 3.0\n", + "episode 335, step 6700, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 2.0\n", + "episode 340, step 6800, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 1.0\n", + "episode 345, step 6900, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 1.0\n", + "episode 350, step 7000, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 1.0\n", + "episode 355, step 7100, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 4.0\n", + "episode 360, step 7200, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 2.0\n", + "episode 365, step 7300, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 1.0\n", + "episode 370, step 7400, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 2.0\n", + "episode 375, step 7500, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 0.0\n", + "episode 380, step 7600, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 1.0\n", + "episode 385, step 7700, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 4.0\n", + "episode 390, step 7800, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 3.0\n", + "episode 395, step 7900, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 3.0\n", + "episode 400, step 8000, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 1.0\n", + "episode 405, step 8100, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 0.0\n", + "episode 410, step 8200, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 1.0\n", + "episode 415, step 8300, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 2.0\n", + "episode 420, step 8400, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 1.0\n", + "episode 425, step 8500, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 1.0\n", + "episode 430, step 8600, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 2.0\n", + "episode 435, step 8700, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 1.0\n", + "episode 440, step 8800, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 1.0\n", + "episode 445, step 8900, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 4.0\n", + "episode 450, step 9000, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 1.0\n", + "episode 455, step 9100, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 2.0\n", + "episode 460, step 9200, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 1.0\n", + "episode 465, step 9300, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 1.0\n", + "episode 470, step 9400, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 2.0\n", + "episode 475, step 9500, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 2.0\n", + "episode 480, step 9600, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 2.0\n", + "episode 485, step 9700, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 1.0\n", + "episode 490, step 9800, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 1.0\n", + "episode 495, step 9900, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 2.0\n", + "episode 500, step 10000, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 3.0\n", + "episode 505, step 10100, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 1.0\n", + "episode 510, step 10200, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 3.0\n", + "episode 515, step 10300, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 3.0\n", + "episode 520, step 10400, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 4.0\n", + "episode 525, step 10500, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 1.0\n", + "episode 530, step 10600, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 1.0\n", + "episode 535, step 10700, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 2.0\n", + "episode 540, step 10800, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 1.0\n", + "episode 545, step 10900, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 1.0\n", + "episode 550, step 11000, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 5.0\n", + "episode 555, step 11100, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 3.0\n", + "episode 560, step 11200, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 2.0\n", + "episode 565, step 11300, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 4.0\n", + "episode 570, step 11400, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 2.0\n", + "episode 575, step 11500, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 3.0\n", + "episode 580, step 11600, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 0.0\n", + "episode 585, step 11700, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 3.0\n", + "episode 590, step 11800, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 2.0\n", + "episode 595, step 11900, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 2.0\n", + "episode 600, step 12000, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 0.0\n", + "episode 605, step 12100, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 1.0\n", + "episode 610, step 12200, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 3.0\n", + "episode 615, step 12300, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 4.0\n", + "episode 620, step 12400, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 2.0\n", + "episode 625, step 12500, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 1.0\n", + "episode 630, step 12600, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 3.0\n", + "episode 635, step 12700, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 2.0\n", + "episode 640, step 12800, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 5.0\n", + "episode 645, step 12900, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 3.0\n", + "episode 650, step 13000, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 3.0\n", + "episode 655, step 13100, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 2.0\n", + "episode 660, step 13200, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 0.0\n", + "episode 665, step 13300, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 2.0\n", + "episode 670, step 13400, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 1.0\n", + "episode 675, step 13500, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 1.0\n", + "episode 680, step 13600, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 2.0\n", + "episode 685, step 13700, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 1.0\n", + "episode 690, step 13800, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 3.0\n", + "episode 695, step 13900, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 2.0\n", + "episode 700, step 14000, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 3.0\n", + "episode 705, step 14100, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 2.0\n", + "episode 710, step 14200, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 2.0\n", + "episode 715, step 14300, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 2.0\n", + "episode 720, step 14400, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 0.0\n", + "episode 725, step 14500, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 0.0\n", + "episode 730, step 14600, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 4.0\n", + "episode 735, step 14700, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 1.0\n", + "episode 740, step 14800, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 3.0\n", + "episode 745, step 14900, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 3.0\n", + "episode 750, step 15000, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 1.0\n", + "episode 755, step 15100, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 0.0\n", + "episode 760, step 15200, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 2.0\n", + "episode 765, step 15300, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 0.0\n", + "episode 770, step 15400, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 3.0\n", + "episode 775, step 15500, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 2.0\n", + "episode 780, step 15600, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 0.0\n", + "episode 785, step 15700, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 1.0\n", + "episode 790, step 15800, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 1.0\n", + "episode 795, step 15900, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 5.0\n", + "episode 800, step 16000, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 1.0\n", + "episode 805, step 16100, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 0.0\n", + "episode 810, step 16200, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 0.0\n", + "episode 815, step 16300, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 3.0\n", + "episode 820, step 16400, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 2.0\n", + "episode 825, step 16500, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 3.0\n", + "episode 830, step 16600, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 1.0\n", + "episode 835, step 16700, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 1.0\n", + "episode 840, step 16800, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 2.0\n", + "episode 845, step 16900, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 1.0\n", + "episode 850, step 17000, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 1.0\n", + "episode 855, step 17100, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 1.0\n", + "episode 860, step 17200, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 2.0\n", + "episode 865, step 17300, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 1.0\n", + "episode 870, step 17400, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 3.0\n", + "episode 875, step 17500, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 1.0\n", + "episode 880, step 17600, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 2.0\n", + "episode 885, step 17700, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 2.0\n", + "episode 890, step 17800, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 2.0\n", + "episode 895, step 17900, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 3.0\n", + "episode 900, step 18000, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 1.0\n", + "episode 905, step 18100, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 5.0\n", + "episode 910, step 18200, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 3.0\n", + "episode 915, step 18300, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 3.0\n", + "episode 920, step 18400, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 2.0\n", + "episode 925, step 18500, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 3.0\n", + "episode 930, step 18600, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 3.0\n", + "episode 935, step 18700, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 2.0\n", + "episode 940, step 18800, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 4.0\n", + "episode 945, step 18900, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 3.0\n", + "episode 950, step 19000, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 0.0\n", + "episode 955, step 19100, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 3.0\n", + "episode 960, step 19200, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 2.0\n", + "episode 965, step 19300, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 5.0\n", + "episode 970, step 19400, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 2.0\n", + "episode 975, step 19500, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 3.0\n", + "episode 980, step 19600, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 2.0\n", + "episode 985, step 19700, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 4.0\n", + "episode 990, step 19800, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 1.0\n", + "episode 995, step 19900, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 2.0\n", + "episode 1000, step 20000, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 3.0\n", + "episode 1005, step 20100, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 4.0\n", + "episode 1010, step 20200, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 2.0\n", + "episode 1015, step 20300, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 6.0\n", + "episode 1020, step 20400, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 1.0\n", + "episode 1025, step 20500, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 0.0\n", + "episode 1030, step 20600, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 1.0\n", + "episode 1035, step 20700, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 2.0\n", + "episode 1040, step 20800, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 3.0\n", + "episode 1045, step 20900, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 3.0\n", + "episode 1050, step 21000, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 1.0\n", + "episode 1055, step 21100, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 2.0\n", + "episode 1060, step 21200, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 3.0\n", + "episode 1065, step 21300, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 0.0\n", + "episode 1070, step 21400, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 1.0\n", + "episode 1075, step 21500, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 3.0\n", + "episode 1080, step 21600, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 3.0\n", + "episode 1085, step 21700, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 1.0\n", + "episode 1090, step 21800, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 1.0\n", + "episode 1095, step 21900, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 1.0\n", + "episode 1100, step 22000, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 2.0\n", + "episode 1105, step 22100, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 2.0\n", + "episode 1110, step 22200, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 3.0\n", + "episode 1115, step 22300, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 2.0\n", + "episode 1120, step 22400, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 0.0\n", + "episode 1125, step 22500, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 0.0\n", + "episode 1130, step 22600, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 1.0\n", + "episode 1135, step 22700, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 2.0\n", + "episode 1140, step 22800, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 1.0\n", + "episode 1145, step 22900, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 1.0\n", + "episode 1150, step 23000, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 2.0\n", + "episode 1155, step 23100, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 3.0\n", + "episode 1160, step 23200, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 2.0\n", + "episode 1165, step 23300, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 2.0\n", + "episode 1170, step 23400, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 2.0\n", + "episode 1175, step 23500, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 3.0\n", + "episode 1180, step 23600, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 2.0\n", + "episode 1185, step 23700, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 4.0\n", + "episode 1190, step 23800, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 2.0\n", + "episode 1195, step 23900, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 4.0\n", + "episode 1200, step 24000, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 3.0\n", + "episode 1205, step 24100, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 3.0\n", + "episode 1210, step 24200, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 3.0\n", + "episode 1215, step 24300, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 2.0\n", + "episode 1220, step 24400, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 1.0\n", + "episode 1225, step 24500, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 0.0\n", + "episode 1230, step 24600, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 2.0\n", + "episode 1235, step 24700, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 1.0\n", + "episode 1240, step 24800, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 4.0\n", + "episode 1245, step 24900, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 1.0\n", + "episode 1250, step 25000, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 1.0\n", + "episode 1255, step 25100, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 1.0\n", + "episode 1260, step 25200, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 2.0\n", + "episode 1265, step 25300, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 2.0\n", + "episode 1270, step 25400, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 0.0\n", + "episode 1275, step 25500, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 6.0\n", + "episode 1280, step 25600, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 5.0\n", + "episode 1285, step 25700, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 5.0\n", + "episode 1290, step 25800, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 0.0\n", + "episode 1295, step 25900, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 0.0\n", + "episode 1300, step 26000, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 3.0\n", + "episode 1305, step 26100, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 2.0\n", + "episode 1310, step 26200, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 2.0\n", + "episode 1315, step 26300, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 2.0\n", + "episode 1320, step 26400, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 3.0\n", + "episode 1325, step 26500, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 2.0\n", + "episode 1330, step 26600, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 4.0\n", + "episode 1335, step 26700, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 1.0\n", + "episode 1340, step 26800, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 1.0\n", + "episode 1345, step 26900, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 4.0\n", + "episode 1350, step 27000, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 3.0\n", + "episode 1355, step 27100, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 2.0\n", + "episode 1360, step 27200, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 1.0\n", + "episode 1365, step 27300, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 0.0\n", + "episode 1370, step 27400, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 2.0\n", + "episode 1375, step 27500, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 1.0\n", + "episode 1380, step 27600, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 5.0\n", + "episode 1385, step 27700, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 4.0\n", + "episode 1390, step 27800, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 4.0\n", + "episode 1395, step 27900, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 2.0\n", + "episode 1400, step 28000, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 2.0\n", + "episode 1405, step 28100, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 0.0\n", + "episode 1410, step 28200, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 4.0\n", + "episode 1415, step 28300, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 2.0\n", + "episode 1420, step 28400, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 4.0\n", + "episode 1425, step 28500, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 2.0\n", + "episode 1430, step 28600, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 2.0\n", + "episode 1435, step 28700, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 1.0\n", + "episode 1440, step 28800, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 1.0\n", + "episode 1445, step 28900, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 2.0\n", + "episode 1450, step 29000, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 3.0\n", + "episode 1455, step 29100, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 3.0\n", + "episode 1460, step 29200, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 3.0\n", + "episode 1465, step 29300, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 3.0\n", + "episode 1470, step 29400, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 3.0\n", + "episode 1475, step 29500, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 1.0\n", + "episode 1480, step 29600, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 0.0\n", + "episode 1485, step 29700, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 3.0\n", + "episode 1490, step 29800, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 4.0\n", + "episode 1495, step 29900, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 0.0\n", + "episode 1500, step 30000, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 5.0\n", + "episode 1505, step 30100, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 0.0\n", + "episode 1510, step 30200, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 1.0\n", + "episode 1515, step 30300, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 4.0\n", + "episode 1520, step 30400, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 1.0\n", + "episode 1525, step 30500, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 3.0\n", + "episode 1530, step 30600, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 1.0\n", + "episode 1535, step 30700, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 1.0\n", + "episode 1540, step 30800, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 1.0\n", + "episode 1545, step 30900, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 2.0\n", + "episode 1550, step 31000, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 1.0\n", + "episode 1555, step 31100, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 3.0\n", + "episode 1560, step 31200, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 1.0\n", + "episode 1565, step 31300, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 5.0\n", + "episode 1570, step 31400, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 3.0\n", + "episode 1575, step 31500, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 2.0\n", + "episode 1580, step 31600, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 1.0\n", + "episode 1585, step 31700, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 4.0\n", + "episode 1590, step 31800, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 3.0\n", + "episode 1595, step 31900, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 1.0\n", + "episode 1600, step 32000, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 4.0\n", + "episode 1605, step 32100, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 4.0\n", + "episode 1610, step 32200, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 3.0\n", + "episode 1615, step 32300, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 3.0\n", + "episode 1620, step 32400, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 0.0\n", + "episode 1625, step 32500, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 1.0\n", + "episode 1630, step 32600, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 2.0\n", + "episode 1635, step 32700, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 1.0\n", + "episode 1640, step 32800, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 0.0\n", + "episode 1645, step 32900, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 0.0\n", + "episode 1650, step 33000, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 3.0\n", + "episode 1655, step 33100, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 3.0\n", + "episode 1660, step 33200, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 3.0\n", + "episode 1665, step 33300, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 0.0\n", + "episode 1670, step 33400, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 3.0\n", + "episode 1675, step 33500, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 0.0\n", + "episode 1680, step 33600, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 1.0\n", + "episode 1685, step 33700, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 2.0\n", + "episode 1690, step 33800, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 4.0\n", + "episode 1695, step 33900, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 2.0\n", + "episode 1700, step 34000, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 1.0\n", + "episode 1705, step 34100, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 1.0\n", + "episode 1710, step 34200, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 0.0\n", + "episode 1715, step 34300, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 3.0\n", + "episode 1720, step 34400, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 3.0\n", + "episode 1725, step 34500, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 2.0\n", + "episode 1730, step 34600, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 3.0\n", + "episode 1735, step 34700, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 0.0\n", + "episode 1740, step 34800, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 0.0\n", + "episode 1745, step 34900, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 3.0\n", + "episode 1750, step 35000, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 1.0\n", + "episode 1755, step 35100, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 0.0\n", + "episode 1760, step 35200, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 2.0\n", + "episode 1765, step 35300, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 2.0\n", + "episode 1770, step 35400, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 3.0\n", + "episode 1775, step 35500, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 0.0\n", + "episode 1780, step 35600, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 1.0\n", + "episode 1785, step 35700, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 0.0\n", + "episode 1790, step 35800, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 4.0\n", + "episode 1795, step 35900, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 2.0\n", + "episode 1800, step 36000, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 2.0\n", + "episode 1805, step 36100, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 4.0\n", + "episode 1810, step 36200, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 0.0\n", + "episode 1815, step 36300, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 2.0\n", + "episode 1820, step 36400, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 2.0\n", + "episode 1825, step 36500, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 1.0\n", + "episode 1830, step 36600, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 2.0\n", + "episode 1835, step 36700, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 5.0\n", + "episode 1840, step 36800, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 3.0\n", + "episode 1845, step 36900, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 1.0\n", + "episode 1850, step 37000, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 3.0\n", + "episode 1855, step 37100, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 3.0\n", + "episode 1860, step 37200, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 4.0\n", + "episode 1865, step 37300, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 4.0\n", + "episode 1870, step 37400, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 2.0\n", + "episode 1875, step 37500, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 0.0\n", + "episode 1880, step 37600, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 1.0\n", + "episode 1885, step 37700, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 3.0\n", + "episode 1890, step 37800, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 3.0\n", + "episode 1895, step 37900, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 1.0\n", + "episode 1900, step 38000, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 2.0\n", + "episode 1905, step 38100, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 1.0\n", + "episode 1910, step 38200, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 2.0\n", + "episode 1915, step 38300, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 1.0\n", + "episode 1920, step 38400, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 3.0\n", + "episode 1925, step 38500, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 3.0\n", + "episode 1930, step 38600, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 2.0\n", + "episode 1935, step 38700, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 3.0\n", + "episode 1940, step 38800, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 2.0\n", + "episode 1945, step 38900, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 1.0\n", + "episode 1950, step 39000, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 3.0\n", + "episode 1955, step 39100, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 1.0\n", + "episode 1960, step 39200, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 2.0\n", + "episode 1965, step 39300, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 0.0\n", + "episode 1970, step 39400, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 5.0\n", + "episode 1975, step 39500, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 1.0\n", + "episode 1980, step 39600, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 2.0\n", + "episode 1985, step 39700, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 1.0\n", + "episode 1990, step 39800, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 1.0\n", + "episode 1995, step 39900, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 1.0\n", + "episode 2000, step 40000, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 1.0\n", + "episode 2005, step 40100, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 0.0\n", + "episode 2010, step 40200, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 0.0\n", + "episode 2015, step 40300, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 3.0\n", + "episode 2020, step 40400, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 1.0\n", + "episode 2025, step 40500, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 1.0\n", + "episode 2030, step 40600, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 3.0\n", + "episode 2035, step 40700, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 0.0\n", + "episode 2040, step 40800, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 0.0\n", + "episode 2045, step 40900, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 1.0\n", + "episode 2050, step 41000, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 1.0\n", + "episode 2055, step 41100, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 2.0\n", + "episode 2060, step 41200, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 2.0\n", + "episode 2065, step 41300, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 1.0\n", + "episode 2070, step 41400, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 1.0\n", + "episode 2075, step 41500, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 2.0\n", + "episode 2080, step 41600, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 2.0\n", + "episode 2085, step 41700, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 7.0\n", + "episode 2090, step 41800, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 2.0\n", + "episode 2095, step 41900, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 1.0\n", + "episode 2100, step 42000, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 3.0\n", + "episode 2105, step 42100, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 1.0\n", + "episode 2110, step 42200, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 3.0\n", + "episode 2115, step 42300, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 2.0\n", + "episode 2120, step 42400, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 1.0\n", + "episode 2125, step 42500, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 2.0\n", + "episode 2130, step 42600, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 2.0\n", + "episode 2135, step 42700, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 1.0\n", + "episode 2140, step 42800, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 1.0\n", + "episode 2145, step 42900, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 0.0\n", + "episode 2150, step 43000, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 3.0\n", + "episode 2155, step 43100, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 1.0\n", + "episode 2160, step 43200, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 2.0\n", + "episode 2165, step 43300, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 3.0\n", + "episode 2170, step 43400, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 3.0\n", + "episode 2175, step 43500, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 2.0\n", + "episode 2180, step 43600, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 2.0\n", + "episode 2185, step 43700, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 3.0\n", + "episode 2190, step 43800, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 2.0\n", + "episode 2195, step 43900, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 4.0\n", + "episode 2200, step 44000, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 1.0\n", + "episode 2205, step 44100, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 2.0\n", + "episode 2210, step 44200, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 2.0\n", + "episode 2215, step 44300, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 1.0\n", + "episode 2220, step 44400, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 2.0\n", + "episode 2225, step 44500, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 4.0\n", + "episode 2230, step 44600, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 1.0\n", + "episode 2235, step 44700, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 1.0\n", + "episode 2240, step 44800, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 2.0\n", + "episode 2245, step 44900, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 0.0\n", + "episode 2250, step 45000, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 1.0\n", + "episode 2255, step 45100, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 0.0\n", + "episode 2260, step 45200, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 4.0\n", + "episode 2265, step 45300, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 4.0\n", + "episode 2270, step 45400, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 2.0\n", + "episode 2275, step 45500, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 3.0\n", + "episode 2280, step 45600, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 1.0\n", + "episode 2285, step 45700, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 2.0\n", + "episode 2290, step 45800, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 1.0\n", + "episode 2295, step 45900, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 1.0\n", + "episode 2300, step 46000, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 2.0\n", + "episode 2305, step 46100, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 4.0\n", + "episode 2310, step 46200, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 1.0\n", + "episode 2315, step 46300, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 2.0\n", + "episode 2320, step 46400, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 3.0\n", + "episode 2325, step 46500, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 1.0\n", + "episode 2330, step 46600, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 1.0\n", + "episode 2335, step 46700, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 3.0\n", + "episode 2340, step 46800, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 1.0\n", + "episode 2345, step 46900, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 5.0\n", + "episode 2350, step 47000, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 3.0\n", + "episode 2355, step 47100, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 2.0\n", + "episode 2360, step 47200, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 3.0\n", + "episode 2365, step 47300, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 3.0\n", + "episode 2370, step 47400, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 2.0\n", + "episode 2375, step 47500, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 5.0\n", + "episode 2380, step 47600, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 0.0\n", + "episode 2385, step 47700, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 2.0\n", + "episode 2390, step 47800, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 2.0\n", + "episode 2395, step 47900, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 2.0\n", + "episode 2400, step 48000, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 2.0\n", + "episode 2405, step 48100, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 2.0\n", + "episode 2410, step 48200, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 3.0\n", + "episode 2415, step 48300, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 2.0\n", + "episode 2420, step 48400, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 2.0\n", + "episode 2425, step 48500, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 1.0\n", + "episode 2430, step 48600, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 2.0\n", + "episode 2435, step 48700, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 0.0\n", + "episode 2440, step 48800, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 2.0\n", + "episode 2445, step 48900, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 0.0\n", + "episode 2450, step 49000, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 2.0\n", + "episode 2455, step 49100, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 4.0\n", + "episode 2460, step 49200, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 2.0\n", + "episode 2465, step 49300, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 1.0\n", + "episode 2470, step 49400, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 3.0\n", + "episode 2475, step 49500, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 4.0\n", + "episode 2480, step 49600, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 0.0\n", + "episode 2485, step 49700, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 3.0\n", + "episode 2490, step 49800, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 2.0\n", + "episode 2495, step 49900, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 1.0\n", + "episode 2500, step 50000, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 3.0\n", + "episode 2505, step 50100, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 2.0\n", + "episode 2510, step 50200, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 5.0\n", + "episode 2515, step 50300, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 1.0\n", + "episode 2520, step 50400, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 1.0\n", + "episode 2525, step 50500, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 2.0\n", + "episode 2530, step 50600, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 2.0\n", + "episode 2535, step 50700, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 3.0\n", + "episode 2540, step 50800, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 4.0\n", + "episode 2545, step 50900, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 3.0\n", + "episode 2550, step 51000, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 0.0\n", + "episode 2555, step 51100, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 0.0\n", + "episode 2560, step 51200, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 3.0\n", + "episode 2565, step 51300, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 1.0\n", + "episode 2570, step 51400, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 4.0\n", + "episode 2575, step 51500, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 2.0\n", + "episode 2580, step 51600, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 1.0\n", + "episode 2585, step 51700, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 2.0\n", + "episode 2590, step 51800, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 1.0\n", + "episode 2595, step 51900, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 1.0\n", + "episode 2600, step 52000, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 4.0\n", + "episode 2605, step 52100, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 1.0\n", + "episode 2610, step 52200, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 5.0\n", + "episode 2615, step 52300, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 3.0\n", + "episode 2620, step 52400, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 0.0\n", + "episode 2625, step 52500, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 2.0\n", + "episode 2630, step 52600, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 1.0\n", + "episode 2635, step 52700, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 0.0\n", + "episode 2640, step 52800, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 0.0\n", + "episode 2645, step 52900, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 3.0\n", + "episode 2650, step 53000, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 5.0\n", + "episode 2655, step 53100, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 2.0\n", + "episode 2660, step 53200, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 2.0\n", + "episode 2665, step 53300, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 1.0\n", + "episode 2670, step 53400, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 3.0\n", + "episode 2675, step 53500, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 2.0\n", + "episode 2680, step 53600, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 0.0\n", + "episode 2685, step 53700, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 1.0\n", + "episode 2690, step 53800, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 2.0\n", + "episode 2695, step 53900, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 3.0\n", + "episode 2700, step 54000, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 3.0\n", + "episode 2705, step 54100, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 1.0\n", + "episode 2710, step 54200, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 0.0\n", + "episode 2715, step 54300, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 4.0\n", + "episode 2720, step 54400, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 4.0\n", + "episode 2725, step 54500, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 1.0\n", + "episode 2730, step 54600, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 3.0\n", + "episode 2735, step 54700, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 3.0\n", + "episode 2740, step 54800, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 1.0\n", + "episode 2745, step 54900, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 2.0\n", + "episode 2750, step 55000, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 0.0\n", + "episode 2755, step 55100, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 3.0\n", + "episode 2760, step 55200, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 1.0\n", + "episode 2765, step 55300, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 1.0\n", + "episode 2770, step 55400, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 3.0\n", + "episode 2775, step 55500, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 4.0\n", + "episode 2780, step 55600, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 0.0\n", + "episode 2785, step 55700, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 2.0\n", + "episode 2790, step 55800, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 1.0\n", + "episode 2795, step 55900, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 2.0\n", + "episode 2800, step 56000, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 0.0\n", + "episode 2805, step 56100, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 2.0\n", + "episode 2810, step 56200, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 2.0\n", + "episode 2815, step 56300, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 0.0\n", + "episode 2820, step 56400, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 0.0\n", + "episode 2825, step 56500, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 1.0\n", + "episode 2830, step 56600, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 0.0\n", + "episode 2835, step 56700, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 3.0\n", + "episode 2840, step 56800, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 1.0\n", + "episode 2845, step 56900, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 1.0\n", + "episode 2850, step 57000, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 4.0\n", + "episode 2855, step 57100, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 3.0\n", + "episode 2860, step 57200, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 3.0\n", + "episode 2865, step 57300, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 3.0\n", + "episode 2870, step 57400, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 3.0\n", + "episode 2875, step 57500, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 1.0\n", + "episode 2880, step 57600, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 2.0\n", + "episode 2885, step 57700, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 2.0\n", + "episode 2890, step 57800, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 0.0\n", + "episode 2895, step 57900, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 4.0\n", + "episode 2900, step 58000, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 1.0\n", + "episode 2905, step 58100, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 1.0\n", + "episode 2910, step 58200, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 3.0\n", + "episode 2915, step 58300, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 1.0\n", + "episode 2920, step 58400, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 2.0\n", + "episode 2925, step 58500, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 3.0\n", + "episode 2930, step 58600, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 1.0\n", + "episode 2935, step 58700, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 0.0\n", + "episode 2940, step 58800, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 2.0\n", + "episode 2945, step 58900, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 1.0\n", + "episode 2950, step 59000, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 3.0\n", + "episode 2955, step 59100, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 3.0\n", + "episode 2960, step 59200, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 3.0\n", + "episode 2965, step 59300, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 2.0\n", + "episode 2970, step 59400, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 0.0\n", + "episode 2975, step 59500, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 3.0\n", + "episode 2980, step 59600, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 1.0\n", + "episode 2985, step 59700, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 1.0\n", + "episode 2990, step 59800, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 2.0\n", + "episode 2995, step 59900, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 4.0\n", + "episode 3000, step 60000, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 4.0\n", + "episode 3005, step 60100, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 1.0\n", + "episode 3010, step 60200, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 0.0\n", + "episode 3015, step 60300, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 1.0\n", + "episode 3020, step 60400, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 3.0\n", + "episode 3025, step 60500, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 2.0\n", + "episode 3030, step 60600, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 1.0\n", + "episode 3035, step 60700, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 2.0\n", + "episode 3040, step 60800, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 3.0\n", + "episode 3045, step 60900, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 0.0\n", + "episode 3050, step 61000, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 1.0\n", + "episode 3055, step 61100, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 2.0\n", + "episode 3060, step 61200, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 3.0\n", + "episode 3065, step 61300, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 0.0\n", + "episode 3070, step 61400, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 2.0\n", + "episode 3075, step 61500, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 4.0\n", + "episode 3080, step 61600, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 3.0\n", + "episode 3085, step 61700, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 2.0\n", + "episode 3090, step 61800, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 3.0\n", + "episode 3095, step 61900, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 1.0\n", + "episode 3100, step 62000, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 0.0\n", + "episode 3105, step 62100, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 2.0\n", + "episode 3110, step 62200, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 1.0\n", + "episode 3115, step 62300, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 0.0\n", + "episode 3120, step 62400, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 2.0\n", + "episode 3125, step 62500, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 2.0\n", + "episode 3130, step 62600, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 2.0\n", + "episode 3135, step 62700, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 2.0\n", + "episode 3140, step 62800, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 2.0\n", + "episode 3145, step 62900, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 3.0\n", + "episode 3150, step 63000, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 1.0\n", + "episode 3155, step 63100, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 2.0\n", + "episode 3160, step 63200, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 2.0\n", + "episode 3165, step 63300, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 5.0\n", + "episode 3170, step 63400, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 3.0\n", + "episode 3175, step 63500, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 1.0\n", + "episode 3180, step 63600, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 5.0\n", + "episode 3185, step 63700, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 1.0\n", + "episode 3190, step 63800, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 1.0\n", + "episode 3195, step 63900, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 0.0\n", + "episode 3200, step 64000, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 2.0\n", + "episode 3205, step 64100, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 1.0\n", + "episode 3210, step 64200, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 2.0\n", + "episode 3215, step 64300, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 2.0\n", + "episode 3220, step 64400, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 1.0\n", + "episode 3225, step 64500, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 1.0\n", + "episode 3230, step 64600, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 1.0\n", + "episode 3235, step 64700, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 4.0\n", + "episode 3240, step 64800, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 2.0\n", + "episode 3245, step 64900, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 2.0\n", + "episode 3250, step 65000, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 2.0\n", + "episode 3255, step 65100, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 1.0\n", + "episode 3260, step 65200, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 1.0\n", + "episode 3265, step 65300, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 2.0\n", + "episode 3270, step 65400, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 1.0\n", + "episode 3275, step 65500, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 3.0\n", + "episode 3280, step 65600, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 2.0\n", + "episode 3285, step 65700, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 2.0\n", + "episode 3290, step 65800, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 2.0\n", + "episode 3295, step 65900, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 4.0\n", + "episode 3300, step 66000, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 5.0\n", + "episode 3305, step 66100, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 1.0\n", + "episode 3310, step 66200, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 5.0\n", + "episode 3315, step 66300, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 2.0\n", + "episode 3320, step 66400, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 2.0\n", + "episode 3325, step 66500, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 2.0\n", + "episode 3330, step 66600, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 3.0\n", + "episode 3335, step 66700, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 3.0\n", + "episode 3340, step 66800, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 1.0\n", + "episode 3345, step 66900, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 3.0\n", + "episode 3350, step 67000, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 1.0\n", + "episode 3355, step 67100, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 1.0\n", + "episode 3360, step 67200, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 3.0\n", + "episode 3365, step 67300, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 1.0\n", + "episode 3370, step 67400, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 0.0\n", + "episode 3375, step 67500, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 2.0\n", + "episode 3380, step 67600, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 3.0\n", + "episode 3385, step 67700, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 2.0\n", + "episode 3390, step 67800, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 3.0\n", + "episode 3395, step 67900, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 2.0\n", + "episode 3400, step 68000, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 1.0\n", + "episode 3405, step 68100, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 2.0\n", + "episode 3410, step 68200, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 0.0\n", + "episode 3415, step 68300, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 4.0\n", + "episode 3420, step 68400, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 1.0\n", + "episode 3425, step 68500, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 3.0\n", + "episode 3430, step 68600, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 4.0\n", + "episode 3435, step 68700, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 4.0\n", + "episode 3440, step 68800, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 0.0\n", + "episode 3445, step 68900, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 4.0\n", + "episode 3450, step 69000, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 3.0\n", + "episode 3455, step 69100, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 4.0\n", + "episode 3460, step 69200, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 2.0\n", + "episode 3465, step 69300, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 3.0\n", + "episode 3470, step 69400, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 1.0\n", + "episode 3475, step 69500, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 2.0\n", + "episode 3480, step 69600, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 3.0\n", + "episode 3485, step 69700, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 1.0\n", + "episode 3490, step 69800, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 4.0\n", + "episode 3495, step 69900, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 2.0\n", + "episode 3500, step 70000, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 0.0\n", + "episode 3505, step 70100, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 3.0\n", + "episode 3510, step 70200, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 1.0\n", + "episode 3515, step 70300, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 0.0\n", + "episode 3520, step 70400, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 1.0\n", + "episode 3525, step 70500, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 0.0\n", + "episode 3530, step 70600, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 5.0\n", + "episode 3535, step 70700, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 4.0\n", + "episode 3540, step 70800, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 2.0\n", + "episode 3545, step 70900, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 4.0\n", + "episode 3550, step 71000, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 2.0\n", + "episode 3555, step 71100, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 1.0\n", + "episode 3560, step 71200, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 1.0\n", + "episode 3565, step 71300, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 4.0\n", + "episode 3570, step 71400, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 1.0\n", + "episode 3575, step 71500, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 4.0\n", + "episode 3580, step 71600, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 3.0\n", + "episode 3585, step 71700, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 5.0\n", + "episode 3590, step 71800, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 3.0\n", + "episode 3595, step 71900, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 4.0\n", + "episode 3600, step 72000, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 5.0\n", + "episode 3605, step 72100, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 0.0\n", + "episode 3610, step 72200, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 0.0\n", + "episode 3615, step 72300, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 2.0\n", + "episode 3620, step 72400, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 3.0\n", + "episode 3625, step 72500, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 3.0\n", + "episode 3630, step 72600, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 1.0\n", + "episode 3635, step 72700, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 1.0\n", + "episode 3640, step 72800, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 1.0\n", + "episode 3645, step 72900, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 5.0\n", + "episode 3650, step 73000, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 1.0\n", + "episode 3655, step 73100, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 2.0\n", + "episode 3660, step 73200, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 2.0\n", + "episode 3665, step 73300, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 2.0\n", + "episode 3670, step 73400, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 2.0\n", + "episode 3675, step 73500, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 2.0\n", + "episode 3680, step 73600, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 2.0\n", + "episode 3685, step 73700, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 5.0\n", + "episode 3690, step 73800, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 2.0\n", + "episode 3695, step 73900, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 1.0\n", + "episode 3700, step 74000, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 4.0\n", + "episode 3705, step 74100, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 1.0\n", + "episode 3710, step 74200, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 1.0\n", + "episode 3715, step 74300, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 3.0\n", + "episode 3720, step 74400, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 2.0\n", + "episode 3725, step 74500, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 2.0\n", + "episode 3730, step 74600, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 3.0\n", + "episode 3735, step 74700, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 1.0\n", + "episode 3740, step 74800, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 1.0\n", + "episode 3745, step 74900, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 2.0\n", + "episode 3750, step 75000, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 2.0\n", + "episode 3755, step 75100, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 3.0\n", + "episode 3760, step 75200, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 1.0\n", + "episode 3765, step 75300, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 4.0\n", + "episode 3770, step 75400, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 4.0\n", + "episode 3775, step 75500, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 4.0\n", + "episode 3780, step 75600, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 0.0\n", + "episode 3785, step 75700, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 5.0\n", + "episode 3790, step 75800, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 1.0\n", + "episode 3795, step 75900, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 6.0\n", + "episode 3800, step 76000, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 1.0\n", + "episode 3805, step 76100, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 1.0\n", + "episode 3810, step 76200, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 1.0\n", + "episode 3815, step 76300, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 0.0\n", + "episode 3820, step 76400, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 2.0\n", + "episode 3825, step 76500, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 0.0\n", + "episode 3830, step 76600, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 6.0\n", + "episode 3835, step 76700, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 2.0\n", + "episode 3840, step 76800, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 2.0\n", + "episode 3845, step 76900, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 2.0\n", + "episode 3850, step 77000, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 1.0\n", + "episode 3855, step 77100, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 0.0\n", + "episode 3860, step 77200, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 3.0\n", + "episode 3865, step 77300, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 1.0\n", + "episode 3870, step 77400, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 0.0\n", + "episode 3875, step 77500, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 4.0\n", + "episode 3880, step 77600, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 1.0\n", + "episode 3885, step 77700, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 3.0\n", + "episode 3890, step 77800, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 4.0\n", + "episode 3895, step 77900, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 5.0\n", + "episode 3900, step 78000, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 2.0\n", + "episode 3905, step 78100, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 1.0\n", + "episode 3910, step 78200, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 2.0\n", + "episode 3915, step 78300, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 3.0\n", + "episode 3920, step 78400, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 0.0\n", + "episode 3925, step 78500, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 2.0\n", + "episode 3930, step 78600, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 2.0\n", + "episode 3935, step 78700, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 2.0\n", + "episode 3940, step 78800, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 1.0\n", + "episode 3945, step 78900, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 3.0\n", + "episode 3950, step 79000, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 6.0\n", + "episode 3955, step 79100, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 2.0\n", + "episode 3960, step 79200, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 3.0\n", + "episode 3965, step 79300, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 3.0\n", + "episode 3970, step 79400, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 3.0\n", + "episode 3975, step 79500, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 2.0\n", + "episode 3980, step 79600, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 2.0\n", + "episode 3985, step 79700, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 2.0\n", + "episode 3990, step 79800, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 3.0\n", + "episode 3995, step 79900, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 0.0\n", + "episode 4000, step 80000, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 4.0\n", + "episode 4005, step 80100, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 4.0\n", + "episode 4010, step 80200, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 0.0\n", + "episode 4015, step 80300, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 3.0\n", + "episode 4020, step 80400, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 2.0\n", + "episode 4025, step 80500, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 2.0\n", + "episode 4030, step 80600, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 4.0\n", + "episode 4035, step 80700, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 2.0\n", + "episode 4040, step 80800, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 0.0\n", + "episode 4045, step 80900, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 3.0\n", + "episode 4050, step 81000, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 2.0\n", + "episode 4055, step 81100, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 2.0\n", + "episode 4060, step 81200, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 3.0\n", + "episode 4065, step 81300, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 0.0\n", + "episode 4070, step 81400, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 0.0\n", + "episode 4075, step 81500, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 3.0\n", + "episode 4080, step 81600, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 2.0\n", + "episode 4085, step 81700, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 0.0\n", + "episode 4090, step 81800, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 1.0\n", + "episode 4095, step 81900, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 3.0\n", + "episode 4100, step 82000, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 1.0\n", + "episode 4105, step 82100, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 2.0\n", + "episode 4110, step 82200, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 1.0\n", + "episode 4115, step 82300, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 2.0\n", + "episode 4120, step 82400, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 0.0\n", + "episode 4125, step 82500, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 2.0\n", + "episode 4130, step 82600, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 0.0\n", + "episode 4135, step 82700, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 3.0\n", + "episode 4140, step 82800, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 1.0\n", + "episode 4145, step 82900, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 3.0\n", + "episode 4150, step 83000, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 3.0\n", + "episode 4155, step 83100, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 2.0\n", + "episode 4160, step 83200, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 3.0\n", + "episode 4165, step 83300, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 3.0\n", + "episode 4170, step 83400, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 3.0\n", + "episode 4175, step 83500, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 3.0\n", + "episode 4180, step 83600, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 0.0\n", + "episode 4185, step 83700, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 1.0\n", + "episode 4190, step 83800, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 3.0\n", + "episode 4195, step 83900, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 4.0\n", + "episode 4200, step 84000, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 1.0\n", + "episode 4205, step 84100, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 4.0\n", + "episode 4210, step 84200, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 1.0\n", + "episode 4215, step 84300, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 3.0\n", + "episode 4220, step 84400, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 1.0\n", + "episode 4225, step 84500, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 2.0\n", + "episode 4230, step 84600, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 3.0\n", + "episode 4235, step 84700, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 4.0\n", + "episode 4240, step 84800, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 5.0\n", + "episode 4245, step 84900, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 3.0\n", + "episode 4250, step 85000, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 2.0\n", + "episode 4255, step 85100, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 2.0\n", + "episode 4260, step 85200, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 5.0\n", + "episode 4265, step 85300, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 5.0\n", + "episode 4270, step 85400, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 1.0\n", + "episode 4275, step 85500, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 3.0\n", + "episode 4280, step 85600, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 1.0\n", + "episode 4285, step 85700, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 3.0\n", + "episode 4290, step 85800, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 0.0\n", + "episode 4295, step 85900, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 2.0\n", + "episode 4300, step 86000, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 1.0\n", + "episode 4305, step 86100, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 1.0\n", + "episode 4310, step 86200, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 1.0\n", + "episode 4315, step 86300, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 4.0\n", + "episode 4320, step 86400, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 0.0\n", + "episode 4325, step 86500, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 1.0\n", + "episode 4330, step 86600, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 2.0\n", + "episode 4335, step 86700, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 3.0\n", + "episode 4340, step 86800, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 3.0\n", + "episode 4345, step 86900, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 0.0\n", + "episode 4350, step 87000, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 0.0\n", + "episode 4355, step 87100, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 0.0\n", + "episode 4360, step 87200, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 3.0\n", + "episode 4365, step 87300, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 1.0\n", + "episode 4370, step 87400, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 2.0\n", + "episode 4375, step 87500, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 1.0\n", + "episode 4380, step 87600, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 2.0\n", + "episode 4385, step 87700, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 4.0\n", + "episode 4390, step 87800, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 3.0\n", + "episode 4395, step 87900, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 2.0\n", + "episode 4400, step 88000, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 1.0\n", + "episode 4405, step 88100, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 0.0\n", + "episode 4410, step 88200, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 2.0\n", + "episode 4415, step 88300, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 3.0\n", + "episode 4420, step 88400, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 2.0\n", + "episode 4425, step 88500, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 1.0\n", + "episode 4430, step 88600, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 1.0\n", + "episode 4435, step 88700, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 1.0\n", + "episode 4440, step 88800, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 3.0\n", + "episode 4445, step 88900, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 3.0\n", + "episode 4450, step 89000, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 3.0\n", + "episode 4455, step 89100, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 0.0\n", + "episode 4460, step 89200, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 4.0\n", + "episode 4465, step 89300, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 1.0\n", + "episode 4470, step 89400, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 1.0\n", + "episode 4475, step 89500, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 3.0\n", + "episode 4480, step 89600, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 1.0\n", + "episode 4485, step 89700, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 1.0\n", + "episode 4490, step 89800, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 2.0\n", + "episode 4495, step 89900, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 3.0\n", + "episode 4500, step 90000, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 1.0\n", + "episode 4505, step 90100, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 1.0\n", + "episode 4510, step 90200, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 3.0\n", + "episode 4515, step 90300, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 3.0\n", + "episode 4520, step 90400, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 2.0\n", + "episode 4525, step 90500, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 2.0\n", + "episode 4530, step 90600, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 0.0\n", + "episode 4535, step 90700, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 0.0\n", + "episode 4540, step 90800, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 2.0\n", + "episode 4545, step 90900, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 0.0\n", + "episode 4550, step 91000, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 3.0\n", + "episode 4555, step 91100, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 5.0\n", + "episode 4560, step 91200, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 3.0\n", + "episode 4565, step 91300, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 2.0\n", + "episode 4570, step 91400, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 1.0\n", + "episode 4575, step 91500, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 1.0\n", + "episode 4580, step 91600, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 0.0\n", + "episode 4585, step 91700, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 1.0\n", + "episode 4590, step 91800, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 3.0\n", + "episode 4595, step 91900, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 2.0\n", + "episode 4600, step 92000, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 3.0\n", + "episode 4605, step 92100, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 3.0\n", + "episode 4610, step 92200, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 4.0\n", + "episode 4615, step 92300, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 4.0\n", + "episode 4620, step 92400, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 4.0\n", + "episode 4625, step 92500, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 0.0\n", + "episode 4630, step 92600, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 1.0\n", + "episode 4635, step 92700, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 3.0\n", + "episode 4640, step 92800, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 4.0\n", + "episode 4645, step 92900, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 1.0\n", + "episode 4650, step 93000, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 0.0\n", + "episode 4655, step 93100, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 0.0\n", + "episode 4660, step 93200, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 1.0\n", + "episode 4665, step 93300, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 1.0\n", + "episode 4670, step 93400, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 4.0\n", + "episode 4675, step 93500, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 0.0\n", + "episode 4680, step 93600, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 3.0\n", + "episode 4685, step 93700, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 4.0\n", + "episode 4690, step 93800, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 2.0\n", + "episode 4695, step 93900, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 2.0\n", + "episode 4700, step 94000, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 3.0\n", + "episode 4705, step 94100, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 4.0\n", + "episode 4710, step 94200, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 0.0\n", + "episode 4715, step 94300, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 1.0\n", + "episode 4720, step 94400, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 5.0\n", + "episode 4725, step 94500, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 1.0\n", + "episode 4730, step 94600, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 0.0\n", + "episode 4735, step 94700, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 0.0\n", + "episode 4740, step 94800, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 0.0\n", + "episode 4745, step 94900, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 1.0\n", + "episode 4750, step 95000, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 0.0\n", + "episode 4755, step 95100, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 0.0\n", + "episode 4760, step 95200, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 5.0\n", + "episode 4765, step 95300, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 1.0\n", + "episode 4770, step 95400, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 3.0\n", + "episode 4775, step 95500, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 2.0\n", + "episode 4780, step 95600, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 0.0\n", + "episode 4785, step 95700, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 2.0\n", + "episode 4790, step 95800, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 2.0\n", + "episode 4795, step 95900, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 4.0\n", + "episode 4800, step 96000, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 3.0\n", + "episode 4805, step 96100, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 6.0\n", + "episode 4810, step 96200, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 0.0\n", + "episode 4815, step 96300, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 4.0\n", + "episode 4820, step 96400, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 1.0\n", + "episode 4825, step 96500, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 3.0\n", + "episode 4830, step 96600, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 5.0\n", + "episode 4835, step 96700, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 3.0\n", + "episode 4840, step 96800, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 1.0\n", + "episode 4845, step 96900, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 4.0\n", + "episode 4850, step 97000, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 2.0\n", + "episode 4855, step 97100, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 3.0\n", + "episode 4860, step 97200, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 5.0\n", + "episode 4865, step 97300, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 0.0\n", + "episode 4870, step 97400, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 2.0\n", + "episode 4875, step 97500, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 2.0\n", + "episode 4880, step 97600, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 0.0\n", + "episode 4885, step 97700, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 3.0\n", + "episode 4890, step 97800, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 1.0\n", + "episode 4895, step 97900, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 0.0\n", + "episode 4900, step 98000, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 2.0\n", + "episode 4905, step 98100, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 0.0\n", + "episode 4910, step 98200, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 2.0\n", + "episode 4915, step 98300, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 1.0\n", + "episode 4920, step 98400, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 3.0\n", + "episode 4925, step 98500, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 0.0\n", + "episode 4930, step 98600, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 1.0\n", + "episode 4935, step 98700, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 0.0\n", + "episode 4940, step 98800, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 1.0\n", + "episode 4945, step 98900, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 3.0\n", + "episode 4950, step 99000, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 1.0\n", + "episode 4955, step 99100, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 2.0\n", + "episode 4960, step 99200, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 1.0\n", + "episode 4965, step 99300, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 1.0\n", + "episode 4970, step 99400, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 3.0\n", + "episode 4975, step 99500, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 3.0\n", + "episode 4980, step 99600, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 2.0\n", + "episode 4985, step 99700, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 1.0\n", + "episode 4990, step 99800, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 4.0\n", + "episode 4995, step 99900, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 2.0\n", + "episode 5000, step 100000, agent=PearlAgent with DeepQLearning, FIFOOffPolicyReplayBuffer, env=\n", + "return: 1.0\n" + ] }, { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## DQN Agent with LSTM history summarization module\n", - "\n", - "Now the DQN agent can handle partially observable environments with history summarization" + "data": { + "image/png": "", + "text/plain": [ + "
" ] - }, + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "# create a pearl agent\n", + "\n", + "action_representation_module = IdentityActionRepresentationModule(\n", + " max_number_actions=action_space.n,\n", + " representation_dim=action_space.action_dim,\n", + ")\n", + "\n", + "# DQN-vanilla\n", + "agent = PearlAgent(\n", + " policy_learner=DeepQLearning(\n", + " state_dim=1,\n", + " action_space=action_space,\n", + " hidden_dims=[64, 64],\n", + " training_rounds=50,\n", + " action_representation_module=action_representation_module,\n", + " ),\n", + " replay_buffer=FIFOOffPolicyReplayBuffer(100_000),\n", + " device_id=-1,\n", + ")\n", + "\n", + "info = online_learning(\n", + " agent=agent,\n", + " env=env,\n", + " number_of_steps=number_of_steps,\n", + " print_every_x_steps=100,\n", + " record_period=record_period,\n", + " learn_after_episode=True,\n", + ")\n", + "torch.save(info[\"return\"], \"DQN-return.pt\")\n", + "plt.plot(record_period * np.arange(len(info[\"return\"])), info[\"return\"], label=\"DQN\")\n", + "plt.legend()\n", + "plt.show()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## DQN Agent with LSTM history summarization module\n", + "\n", + "Now the DQN agent can handle partially observable environments with history summarization" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": { + "id": "cDauzO74nS4c", + "vscode": { + "languageId": "python" + } + }, + "outputs": [ { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "cDauzO74nS4c" - }, - "outputs": [], - "source": [ - "# Add a LSTM history summarization module\n", - "\n", - "agent = PearlAgent(\n", - " policy_learner=DeepQLearning(\n", - " state_dim=128,\n", - " action_space=action_space,\n", - " hidden_dims=[64, 64],\n", - " training_rounds=50,\n", - " action_representation_module=action_representation_module,\n", - " ),\n", - " history_summarization_module=LSTMHistorySummarizationModule(\n", - " observation_dim=1,\n", - " action_dim=100,\n", - " hidden_dim=128,\n", - " history_length=8,\n", - " ),\n", - " replay_buffer=FIFOOffPolicyReplayBuffer(100_000),\n", - " device_id=-1,\n", - ")\n", - "\n", - "info = online_learning(\n", - " agent=agent,\n", - " env=env,\n", - " number_of_steps=number_of_steps,\n", - " print_every_x_steps=100,\n", - " record_period=record_period,\n", - " learn_after_episode=True,\n", - ")\n", - "torch.save(info[\"return\"], \"DQN-LSTM-return.pt\")\n", - "plt.plot(record_period * np.arange(len(info[\"return\"])), info[\"return\"], label=\"DQN-LSTM\")\n", - "plt.legend()\n", - "plt.show()" - ] + "name": "stdout", + "output_type": "stream", + "text": [ + "episode 5, step 100, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 1.0\n", + "episode 10, step 200, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 3.0\n", + "episode 15, step 300, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 2.0\n", + "episode 20, step 400, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 2.0\n", + "episode 25, step 500, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 0.0\n", + "episode 30, step 600, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 3.0\n", + "episode 35, step 700, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 1.0\n", + "episode 40, step 800, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 1.0\n", + "episode 45, step 900, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 3.0\n", + "episode 50, step 1000, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 1.0\n", + "episode 55, step 1100, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 1.0\n", + "episode 60, step 1200, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 1.0\n", + "episode 65, step 1300, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 3.0\n", + "episode 70, step 1400, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 2.0\n", + "episode 75, step 1500, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 2.0\n", + "episode 80, step 1600, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 2.0\n", + "episode 85, step 1700, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 2.0\n", + "episode 90, step 1800, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 1.0\n", + "episode 95, step 1900, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 6.0\n", + "episode 100, step 2000, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 3.0\n", + "episode 105, step 2100, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 0.0\n", + "episode 110, step 2200, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 2.0\n", + "episode 115, step 2300, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 2.0\n", + "episode 120, step 2400, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 0.0\n", + "episode 125, step 2500, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 2.0\n", + "episode 130, step 2600, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 2.0\n", + "episode 135, step 2700, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 0.0\n", + "episode 140, step 2800, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 0.0\n", + "episode 145, step 2900, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 1.0\n", + "episode 150, step 3000, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 1.0\n", + "episode 155, step 3100, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 1.0\n", + "episode 160, step 3200, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 1.0\n", + "episode 165, step 3300, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 3.0\n", + "episode 170, step 3400, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 2.0\n", + "episode 175, step 3500, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 1.0\n", + "episode 180, step 3600, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 1.0\n", + "episode 185, step 3700, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 2.0\n", + "episode 190, step 3800, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 3.0\n", + "episode 195, step 3900, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 3.0\n", + "episode 200, step 4000, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 3.0\n", + "episode 205, step 4100, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 0.0\n", + "episode 210, step 4200, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 0.0\n", + "episode 215, step 4300, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 1.0\n", + "episode 220, step 4400, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 2.0\n", + "episode 225, step 4500, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 2.0\n", + "episode 230, step 4600, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 0.0\n", + "episode 235, step 4700, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 3.0\n", + "episode 240, step 4800, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 3.0\n", + "episode 245, step 4900, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 2.0\n", + "episode 250, step 5000, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 2.0\n", + "episode 255, step 5100, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 3.0\n", + "episode 260, step 5200, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 1.0\n", + "episode 265, step 5300, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 1.0\n", + "episode 270, step 5400, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 3.0\n", + "episode 275, step 5500, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 0.0\n", + "episode 280, step 5600, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 3.0\n", + "episode 285, step 5700, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 3.0\n", + "episode 290, step 5800, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 0.0\n", + "episode 295, step 5900, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 2.0\n", + "episode 300, step 6000, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 3.0\n", + "episode 305, step 6100, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 3.0\n", + "episode 310, step 6200, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 4.0\n", + "episode 315, step 6300, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 3.0\n", + "episode 320, step 6400, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 0.0\n", + "episode 325, step 6500, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 1.0\n", + "episode 330, step 6600, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 2.0\n", + "episode 335, step 6700, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 1.0\n", + "episode 340, step 6800, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 2.0\n", + "episode 345, step 6900, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 3.0\n", + "episode 350, step 7000, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 2.0\n", + "episode 355, step 7100, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 2.0\n", + "episode 360, step 7200, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 2.0\n", + "episode 365, step 7300, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 3.0\n", + "episode 370, step 7400, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 3.0\n", + "episode 375, step 7500, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 2.0\n", + "episode 380, step 7600, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 1.0\n", + "episode 385, step 7700, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 3.0\n", + "episode 390, step 7800, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 2.0\n", + "episode 395, step 7900, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 3.0\n", + "episode 400, step 8000, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 0.0\n", + "episode 405, step 8100, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 1.0\n", + "episode 410, step 8200, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 5.0\n", + "episode 415, step 8300, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 0.0\n", + "episode 420, step 8400, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 2.0\n", + "episode 425, step 8500, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 2.0\n", + "episode 430, step 8600, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 2.0\n", + "episode 435, step 8700, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 2.0\n", + "episode 440, step 8800, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 2.0\n", + "episode 445, step 8900, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 2.0\n", + "episode 450, step 9000, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 1.0\n", + "episode 455, step 9100, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 1.0\n", + "episode 460, step 9200, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 1.0\n", + "episode 465, step 9300, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 1.0\n", + "episode 470, step 9400, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 1.0\n", + "episode 475, step 9500, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 2.0\n", + "episode 480, step 9600, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 4.0\n", + "episode 485, step 9700, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 2.0\n", + "episode 490, step 9800, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 3.0\n", + "episode 495, step 9900, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 2.0\n", + "episode 500, step 10000, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 2.0\n", + "episode 505, step 10100, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 2.0\n", + "episode 510, step 10200, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 2.0\n", + "episode 515, step 10300, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 0.0\n", + "episode 520, step 10400, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 1.0\n", + "episode 525, step 10500, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 0.0\n", + "episode 530, step 10600, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 1.0\n", + "episode 535, step 10700, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 0.0\n", + "episode 540, step 10800, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 3.0\n", + "episode 545, step 10900, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 0.0\n", + "episode 550, step 11000, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 0.0\n", + "episode 555, step 11100, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 1.0\n", + "episode 560, step 11200, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 3.0\n", + "episode 565, step 11300, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 2.0\n", + "episode 570, step 11400, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 2.0\n", + "episode 575, step 11500, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 1.0\n", + "episode 580, step 11600, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 2.0\n", + "episode 585, step 11700, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 5.0\n", + "episode 590, step 11800, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 3.0\n", + "episode 595, step 11900, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 0.0\n", + "episode 600, step 12000, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 0.0\n", + "episode 605, step 12100, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 2.0\n", + "episode 610, step 12200, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 1.0\n", + "episode 615, step 12300, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 1.0\n", + "episode 620, step 12400, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 4.0\n", + "episode 625, step 12500, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 2.0\n", + "episode 630, step 12600, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 3.0\n", + "episode 635, step 12700, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 2.0\n", + "episode 640, step 12800, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 2.0\n", + "episode 645, step 12900, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 2.0\n", + "episode 650, step 13000, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 0.0\n", + "episode 655, step 13100, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 2.0\n", + "episode 660, step 13200, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 2.0\n", + "episode 665, step 13300, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 3.0\n", + "episode 670, step 13400, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 4.0\n", + "episode 675, step 13500, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 2.0\n", + "episode 680, step 13600, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 3.0\n", + "episode 685, step 13700, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 1.0\n", + "episode 690, step 13800, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 4.0\n", + "episode 695, step 13900, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 2.0\n", + "episode 700, step 14000, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 2.0\n", + "episode 705, step 14100, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 1.0\n", + "episode 710, step 14200, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 3.0\n", + "episode 715, step 14300, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 3.0\n", + "episode 720, step 14400, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 1.0\n", + "episode 725, step 14500, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 1.0\n", + "episode 730, step 14600, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 2.0\n", + "episode 735, step 14700, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 0.0\n", + "episode 740, step 14800, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 4.0\n", + "episode 745, step 14900, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 4.0\n", + "episode 750, step 15000, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 1.0\n", + "episode 755, step 15100, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 3.0\n", + "episode 760, step 15200, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 3.0\n", + "episode 765, step 15300, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 2.0\n", + "episode 770, step 15400, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 4.0\n", + "episode 775, step 15500, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 2.0\n", + "episode 780, step 15600, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 2.0\n", + "episode 785, step 15700, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 2.0\n", + "episode 790, step 15800, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 1.0\n", + "episode 795, step 15900, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 0.0\n", + "episode 800, step 16000, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 4.0\n", + "episode 805, step 16100, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 1.0\n", + "episode 810, step 16200, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 3.0\n", + "episode 815, step 16300, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 2.0\n", + "episode 820, step 16400, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 3.0\n", + "episode 825, step 16500, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 1.0\n", + "episode 830, step 16600, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 2.0\n", + "episode 835, step 16700, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 0.0\n", + "episode 840, step 16800, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 3.0\n", + "episode 845, step 16900, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 3.0\n", + "episode 850, step 17000, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 3.0\n", + "episode 855, step 17100, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 3.0\n", + "episode 860, step 17200, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 4.0\n", + "episode 865, step 17300, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 3.0\n", + "episode 870, step 17400, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 1.0\n", + "episode 875, step 17500, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 4.0\n", + "episode 880, step 17600, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 1.0\n", + "episode 885, step 17700, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 0.0\n", + "episode 890, step 17800, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 1.0\n", + "episode 895, step 17900, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 2.0\n", + "episode 900, step 18000, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 1.0\n", + "episode 905, step 18100, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 3.0\n", + "episode 910, step 18200, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 2.0\n", + "episode 915, step 18300, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 3.0\n", + "episode 920, step 18400, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 1.0\n", + "episode 925, step 18500, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 0.0\n", + "episode 930, step 18600, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 2.0\n", + "episode 935, step 18700, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 2.0\n", + "episode 940, step 18800, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 3.0\n", + "episode 945, step 18900, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 2.0\n", + "episode 950, step 19000, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 1.0\n", + "episode 955, step 19100, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 3.0\n", + "episode 960, step 19200, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 1.0\n", + "episode 965, step 19300, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 1.0\n", + "episode 970, step 19400, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 2.0\n", + "episode 975, step 19500, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 3.0\n", + "episode 980, step 19600, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 2.0\n", + "episode 985, step 19700, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 3.0\n", + "episode 990, step 19800, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 3.0\n", + "episode 995, step 19900, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 4.0\n", + "episode 1000, step 20000, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 2.0\n", + "episode 1005, step 20100, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 6.0\n", + "episode 1010, step 20200, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 1.0\n", + "episode 1015, step 20300, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 1.0\n", + "episode 1020, step 20400, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 4.0\n", + "episode 1025, step 20500, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 3.0\n", + "episode 1030, step 20600, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 1.0\n", + "episode 1035, step 20700, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 2.0\n", + "episode 1040, step 20800, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 3.0\n", + "episode 1045, step 20900, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 1.0\n", + "episode 1050, step 21000, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 3.0\n", + "episode 1055, step 21100, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 1.0\n", + "episode 1060, step 21200, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 1.0\n", + "episode 1065, step 21300, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 1.0\n", + "episode 1070, step 21400, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 0.0\n", + "episode 1075, step 21500, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 0.0\n", + "episode 1080, step 21600, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 2.0\n", + "episode 1085, step 21700, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 2.0\n", + "episode 1090, step 21800, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 0.0\n", + "episode 1095, step 21900, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 2.0\n", + "episode 1100, step 22000, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 3.0\n", + "episode 1105, step 22100, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 3.0\n", + "episode 1110, step 22200, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 0.0\n", + "episode 1115, step 22300, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 3.0\n", + "episode 1120, step 22400, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 0.0\n", + "episode 1125, step 22500, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 3.0\n", + "episode 1130, step 22600, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 3.0\n", + "episode 1135, step 22700, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 2.0\n", + "episode 1140, step 22800, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 2.0\n", + "episode 1145, step 22900, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 2.0\n", + "episode 1150, step 23000, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 3.0\n", + "episode 1155, step 23100, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 0.0\n", + "episode 1160, step 23200, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 3.0\n", + "episode 1165, step 23300, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 2.0\n", + "episode 1170, step 23400, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 3.0\n", + "episode 1175, step 23500, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 1.0\n", + "episode 1180, step 23600, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 1.0\n", + "episode 1185, step 23700, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 1.0\n", + "episode 1190, step 23800, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 3.0\n", + "episode 1195, step 23900, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 3.0\n", + "episode 1200, step 24000, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 3.0\n", + "episode 1205, step 24100, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 2.0\n", + "episode 1210, step 24200, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 1.0\n", + "episode 1215, step 24300, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 3.0\n", + "episode 1220, step 24400, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 1.0\n", + "episode 1225, step 24500, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 1.0\n", + "episode 1230, step 24600, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 3.0\n", + "episode 1235, step 24700, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 4.0\n", + "episode 1240, step 24800, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 4.0\n", + "episode 1245, step 24900, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 3.0\n", + "episode 1250, step 25000, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 2.0\n", + "episode 1255, step 25100, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 3.0\n", + "episode 1260, step 25200, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 4.0\n", + "episode 1265, step 25300, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 2.0\n", + "episode 1270, step 25400, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 1.0\n", + "episode 1275, step 25500, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 1.0\n", + "episode 1280, step 25600, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 3.0\n", + "episode 1285, step 25700, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 0.0\n", + "episode 1290, step 25800, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 2.0\n", + "episode 1295, step 25900, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 6.0\n", + "episode 1300, step 26000, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 2.0\n", + "episode 1305, step 26100, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 2.0\n", + "episode 1310, step 26200, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 2.0\n", + "episode 1315, step 26300, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 1.0\n", + "episode 1320, step 26400, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 4.0\n", + "episode 1325, step 26500, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 2.0\n", + "episode 1330, step 26600, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 4.0\n", + "episode 1335, step 26700, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 3.0\n", + "episode 1340, step 26800, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 3.0\n", + "episode 1345, step 26900, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 3.0\n", + "episode 1350, step 27000, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 1.0\n", + "episode 1355, step 27100, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 4.0\n", + "episode 1360, step 27200, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 1.0\n", + "episode 1365, step 27300, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 4.0\n", + "episode 1370, step 27400, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 2.0\n", + "episode 1375, step 27500, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 2.0\n", + "episode 1380, step 27600, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 6.0\n", + "episode 1385, step 27700, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 3.0\n", + "episode 1390, step 27800, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 2.0\n", + "episode 1395, step 27900, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 5.0\n", + "episode 1400, step 28000, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 2.0\n", + "episode 1405, step 28100, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 2.0\n", + "episode 1410, step 28200, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 3.0\n", + "episode 1415, step 28300, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 1.0\n", + "episode 1420, step 28400, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 1.0\n", + "episode 1425, step 28500, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 2.0\n", + "episode 1430, step 28600, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 1.0\n", + "episode 1435, step 28700, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 2.0\n", + "episode 1440, step 28800, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 2.0\n", + "episode 1445, step 28900, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 3.0\n", + "episode 1450, step 29000, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 5.0\n", + "episode 1455, step 29100, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 4.0\n", + "episode 1460, step 29200, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 5.0\n", + "episode 1465, step 29300, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 3.0\n", + "episode 1470, step 29400, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 2.0\n", + "episode 1475, step 29500, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 1.0\n", + "episode 1480, step 29600, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 2.0\n", + "episode 1485, step 29700, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 4.0\n", + "episode 1490, step 29800, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 2.0\n", + "episode 1495, step 29900, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 2.0\n", + "episode 1500, step 30000, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 4.0\n", + "episode 1505, step 30100, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 3.0\n", + "episode 1510, step 30200, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 3.0\n", + "episode 1515, step 30300, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 2.0\n", + "episode 1520, step 30400, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 4.0\n", + "episode 1525, step 30500, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 1.0\n", + "episode 1530, step 30600, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 3.0\n", + "episode 1535, step 30700, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 2.0\n", + "episode 1540, step 30800, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 3.0\n", + "episode 1545, step 30900, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 2.0\n", + "episode 1550, step 31000, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 1.0\n", + "episode 1555, step 31100, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 1.0\n", + "episode 1560, step 31200, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 1.0\n", + "episode 1565, step 31300, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 4.0\n", + "episode 1570, step 31400, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 1.0\n", + "episode 1575, step 31500, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 2.0\n", + "episode 1580, step 31600, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 2.0\n", + "episode 1585, step 31700, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 2.0\n", + "episode 1590, step 31800, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 2.0\n", + "episode 1595, step 31900, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 5.0\n", + "episode 1600, step 32000, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 1.0\n", + "episode 1605, step 32100, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 1.0\n", + "episode 1610, step 32200, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 0.0\n", + "episode 1615, step 32300, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 2.0\n", + "episode 1620, step 32400, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 4.0\n", + "episode 1625, step 32500, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 4.0\n", + "episode 1630, step 32600, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 2.0\n", + "episode 1635, step 32700, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 2.0\n", + "episode 1640, step 32800, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 1.0\n", + "episode 1645, step 32900, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 2.0\n", + "episode 1650, step 33000, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 3.0\n", + "episode 1655, step 33100, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 3.0\n", + "episode 1660, step 33200, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 2.0\n", + "episode 1665, step 33300, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 2.0\n", + "episode 1670, step 33400, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 2.0\n", + "episode 1675, step 33500, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 3.0\n", + "episode 1680, step 33600, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 1.0\n", + "episode 1685, step 33700, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 3.0\n", + "episode 1690, step 33800, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 1.0\n", + "episode 1695, step 33900, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 0.0\n", + "episode 1700, step 34000, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 2.0\n", + "episode 1705, step 34100, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 0.0\n", + "episode 1710, step 34200, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 3.0\n", + "episode 1715, step 34300, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 1.0\n", + "episode 1720, step 34400, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 0.0\n", + "episode 1725, step 34500, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 1.0\n", + "episode 1730, step 34600, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 1.0\n", + "episode 1735, step 34700, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 4.0\n", + "episode 1740, step 34800, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 3.0\n", + "episode 1745, step 34900, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 0.0\n", + "episode 1750, step 35000, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 4.0\n", + "episode 1755, step 35100, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 2.0\n", + "episode 1760, step 35200, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 1.0\n", + "episode 1765, step 35300, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 3.0\n", + "episode 1770, step 35400, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 5.0\n", + "episode 1775, step 35500, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 1.0\n", + "episode 1780, step 35600, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 3.0\n", + "episode 1785, step 35700, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 2.0\n", + "episode 1790, step 35800, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 1.0\n", + "episode 1795, step 35900, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 4.0\n", + "episode 1800, step 36000, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 2.0\n", + "episode 1805, step 36100, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 0.0\n", + "episode 1810, step 36200, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 3.0\n", + "episode 1815, step 36300, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 2.0\n", + "episode 1820, step 36400, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 3.0\n", + "episode 1825, step 36500, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 0.0\n", + "episode 1830, step 36600, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 5.0\n", + "episode 1835, step 36700, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 4.0\n", + "episode 1840, step 36800, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 6.0\n", + "episode 1845, step 36900, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 1.0\n", + "episode 1850, step 37000, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 1.0\n", + "episode 1855, step 37100, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 2.0\n", + "episode 1860, step 37200, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 2.0\n", + "episode 1865, step 37300, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 2.0\n", + "episode 1870, step 37400, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 3.0\n", + "episode 1875, step 37500, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 1.0\n", + "episode 1880, step 37600, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 0.0\n", + "episode 1885, step 37700, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 2.0\n", + "episode 1890, step 37800, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 3.0\n", + "episode 1895, step 37900, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 3.0\n", + "episode 1900, step 38000, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 1.0\n", + "episode 1905, step 38100, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 2.0\n", + "episode 1910, step 38200, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 2.0\n", + "episode 1915, step 38300, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 2.0\n", + "episode 1920, step 38400, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 3.0\n", + "episode 1925, step 38500, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 4.0\n", + "episode 1930, step 38600, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 2.0\n", + "episode 1935, step 38700, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 1.0\n", + "episode 1940, step 38800, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 1.0\n", + "episode 1945, step 38900, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 2.0\n", + "episode 1950, step 39000, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 3.0\n", + "episode 1955, step 39100, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 4.0\n", + "episode 1960, step 39200, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 5.0\n", + "episode 1965, step 39300, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 2.0\n", + "episode 1970, step 39400, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 1.0\n", + "episode 1975, step 39500, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 2.0\n", + "episode 1980, step 39600, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 2.0\n", + "episode 1985, step 39700, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 2.0\n", + "episode 1990, step 39800, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 1.0\n", + "episode 1995, step 39900, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 2.0\n", + "episode 2000, step 40000, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 1.0\n", + "episode 2005, step 40100, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 2.0\n", + "episode 2010, step 40200, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 1.0\n", + "episode 2015, step 40300, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 3.0\n", + "episode 2020, step 40400, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 1.0\n", + "episode 2025, step 40500, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 2.0\n", + "episode 2030, step 40600, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 4.0\n", + "episode 2035, step 40700, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 2.0\n", + "episode 2040, step 40800, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 4.0\n", + "episode 2045, step 40900, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 2.0\n", + "episode 2050, step 41000, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 1.0\n", + "episode 2055, step 41100, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 1.0\n", + "episode 2060, step 41200, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 1.0\n", + "episode 2065, step 41300, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 2.0\n", + "episode 2070, step 41400, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 4.0\n", + "episode 2075, step 41500, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 3.0\n", + "episode 2080, step 41600, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 3.0\n", + "episode 2085, step 41700, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 0.0\n", + "episode 2090, step 41800, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 1.0\n", + "episode 2095, step 41900, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 2.0\n", + "episode 2100, step 42000, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 2.0\n", + "episode 2105, step 42100, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 0.0\n", + "episode 2110, step 42200, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 2.0\n", + "episode 2115, step 42300, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 3.0\n", + "episode 2120, step 42400, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 1.0\n", + "episode 2125, step 42500, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 3.0\n", + "episode 2130, step 42600, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 0.0\n", + "episode 2135, step 42700, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 1.0\n", + "episode 2140, step 42800, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 4.0\n", + "episode 2145, step 42900, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 3.0\n", + "episode 2150, step 43000, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 4.0\n", + "episode 2155, step 43100, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 1.0\n", + "episode 2160, step 43200, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 3.0\n", + "episode 2165, step 43300, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 2.0\n", + "episode 2170, step 43400, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 2.0\n", + "episode 2175, step 43500, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 1.0\n", + "episode 2180, step 43600, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 4.0\n", + "episode 2185, step 43700, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 2.0\n", + "episode 2190, step 43800, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 2.0\n", + "episode 2195, step 43900, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 3.0\n", + "episode 2200, step 44000, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 2.0\n", + "episode 2205, step 44100, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 1.0\n", + "episode 2210, step 44200, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 5.0\n", + "episode 2215, step 44300, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 2.0\n", + "episode 2220, step 44400, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 2.0\n", + "episode 2225, step 44500, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 5.0\n", + "episode 2230, step 44600, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 4.0\n", + "episode 2235, step 44700, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 2.0\n", + "episode 2240, step 44800, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 0.0\n", + "episode 2245, step 44900, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 3.0\n", + "episode 2250, step 45000, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 4.0\n", + "episode 2255, step 45100, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 3.0\n", + "episode 2260, step 45200, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 3.0\n", + "episode 2265, step 45300, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 5.0\n", + "episode 2270, step 45400, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 1.0\n", + "episode 2275, step 45500, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 1.0\n", + "episode 2280, step 45600, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 2.0\n", + "episode 2285, step 45700, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 6.0\n", + "episode 2290, step 45800, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 1.0\n", + "episode 2295, step 45900, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 1.0\n", + "episode 2300, step 46000, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 4.0\n", + "episode 2305, step 46100, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 3.0\n", + "episode 2310, step 46200, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 5.0\n", + "episode 2315, step 46300, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 2.0\n", + "episode 2320, step 46400, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 3.0\n", + "episode 2325, step 46500, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 7.0\n", + "episode 2330, step 46600, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 2.0\n", + "episode 2335, step 46700, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 1.0\n", + "episode 2340, step 46800, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 2.0\n", + "episode 2345, step 46900, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 3.0\n", + "episode 2350, step 47000, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 3.0\n", + "episode 2355, step 47100, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 2.0\n", + "episode 2360, step 47200, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 2.0\n", + "episode 2365, step 47300, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 3.0\n", + "episode 2370, step 47400, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 3.0\n", + "episode 2375, step 47500, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 3.0\n", + "episode 2380, step 47600, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 1.0\n", + "episode 2385, step 47700, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 0.0\n", + "episode 2390, step 47800, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 2.0\n", + "episode 2395, step 47900, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 2.0\n", + "episode 2400, step 48000, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 2.0\n", + "episode 2405, step 48100, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 3.0\n", + "episode 2410, step 48200, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 3.0\n", + "episode 2415, step 48300, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 2.0\n", + "episode 2420, step 48400, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 3.0\n", + "episode 2425, step 48500, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 1.0\n", + "episode 2430, step 48600, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 2.0\n", + "episode 2435, step 48700, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 3.0\n", + "episode 2440, step 48800, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 5.0\n", + "episode 2445, step 48900, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 3.0\n", + "episode 2450, step 49000, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 2.0\n", + "episode 2455, step 49100, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 1.0\n", + "episode 2460, step 49200, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 2.0\n", + "episode 2465, step 49300, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 4.0\n", + "episode 2470, step 49400, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 3.0\n", + "episode 2475, step 49500, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 3.0\n", + "episode 2480, step 49600, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 3.0\n", + "episode 2485, step 49700, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 1.0\n", + "episode 2490, step 49800, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 2.0\n", + "episode 2495, step 49900, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 1.0\n", + "episode 2500, step 50000, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 4.0\n", + "episode 2505, step 50100, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 5.0\n", + "episode 2510, step 50200, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 1.0\n", + "episode 2515, step 50300, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 1.0\n", + "episode 2520, step 50400, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 1.0\n", + "episode 2525, step 50500, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 5.0\n", + "episode 2530, step 50600, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 4.0\n", + "episode 2535, step 50700, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 2.0\n", + "episode 2540, step 50800, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 1.0\n", + "episode 2545, step 50900, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 2.0\n", + "episode 2550, step 51000, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 2.0\n", + "episode 2555, step 51100, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 1.0\n", + "episode 2560, step 51200, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 1.0\n", + "episode 2565, step 51300, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 3.0\n", + "episode 2570, step 51400, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 3.0\n", + "episode 2575, step 51500, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 2.0\n", + "episode 2580, step 51600, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 6.0\n", + "episode 2585, step 51700, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 3.0\n", + "episode 2590, step 51800, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 3.0\n", + "episode 2595, step 51900, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 6.0\n", + "episode 2600, step 52000, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 2.0\n", + "episode 2605, step 52100, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 1.0\n", + "episode 2610, step 52200, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 4.0\n", + "episode 2615, step 52300, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 2.0\n", + "episode 2620, step 52400, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 4.0\n", + "episode 2625, step 52500, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 1.0\n", + "episode 2630, step 52600, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 2.0\n", + "episode 2635, step 52700, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 2.0\n", + "episode 2640, step 52800, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 3.0\n", + "episode 2645, step 52900, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 0.0\n", + "episode 2650, step 53000, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 3.0\n", + "episode 2655, step 53100, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 2.0\n", + "episode 2660, step 53200, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 1.0\n", + "episode 2665, step 53300, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 3.0\n", + "episode 2670, step 53400, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 2.0\n", + "episode 2675, step 53500, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 3.0\n", + "episode 2680, step 53600, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 2.0\n", + "episode 2685, step 53700, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 2.0\n", + "episode 2690, step 53800, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 0.0\n", + "episode 2695, step 53900, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 2.0\n", + "episode 2700, step 54000, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 3.0\n", + "episode 2705, step 54100, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 2.0\n", + "episode 2710, step 54200, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 4.0\n", + "episode 2715, step 54300, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 5.0\n", + "episode 2720, step 54400, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 1.0\n", + "episode 2725, step 54500, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 7.0\n", + "episode 2730, step 54600, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 1.0\n", + "episode 2735, step 54700, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 2.0\n", + "episode 2740, step 54800, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 2.0\n", + "episode 2745, step 54900, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 2.0\n", + "episode 2750, step 55000, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 2.0\n", + "episode 2755, step 55100, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 4.0\n", + "episode 2760, step 55200, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 5.0\n", + "episode 2765, step 55300, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 5.0\n", + "episode 2770, step 55400, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 3.0\n", + "episode 2775, step 55500, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 3.0\n", + "episode 2780, step 55600, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 1.0\n", + "episode 2785, step 55700, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 1.0\n", + "episode 2790, step 55800, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 2.0\n", + "episode 2795, step 55900, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 2.0\n", + "episode 2800, step 56000, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 4.0\n", + "episode 2805, step 56100, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 2.0\n", + "episode 2810, step 56200, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 1.0\n", + "episode 2815, step 56300, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 3.0\n", + "episode 2820, step 56400, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 1.0\n", + "episode 2825, step 56500, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 3.0\n", + "episode 2830, step 56600, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 2.0\n", + "episode 2835, step 56700, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 3.0\n", + "episode 2840, step 56800, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 2.0\n", + "episode 2845, step 56900, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 4.0\n", + "episode 2850, step 57000, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 1.0\n", + "episode 2855, step 57100, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 2.0\n", + "episode 2860, step 57200, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 2.0\n", + "episode 2865, step 57300, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 4.0\n", + "episode 2870, step 57400, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 3.0\n", + "episode 2875, step 57500, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 7.0\n", + "episode 2880, step 57600, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 3.0\n", + "episode 2885, step 57700, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 2.0\n", + "episode 2890, step 57800, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 4.0\n", + "episode 2895, step 57900, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 4.0\n", + "episode 2900, step 58000, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 4.0\n", + "episode 2905, step 58100, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 2.0\n", + "episode 2910, step 58200, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 2.0\n", + "episode 2915, step 58300, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 2.0\n", + "episode 2920, step 58400, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 1.0\n", + "episode 2925, step 58500, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 3.0\n", + "episode 2930, step 58600, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 2.0\n", + "episode 2935, step 58700, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 0.0\n", + "episode 2940, step 58800, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 1.0\n", + "episode 2945, step 58900, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 0.0\n", + "episode 2950, step 59000, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 3.0\n", + "episode 2955, step 59100, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 2.0\n", + "episode 2960, step 59200, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 2.0\n", + "episode 2965, step 59300, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 1.0\n", + "episode 2970, step 59400, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 0.0\n", + "episode 2975, step 59500, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 2.0\n", + "episode 2980, step 59600, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 3.0\n", + "episode 2985, step 59700, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 1.0\n", + "episode 2990, step 59800, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 1.0\n", + "episode 2995, step 59900, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 4.0\n", + "episode 3000, step 60000, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 3.0\n", + "episode 3005, step 60100, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 2.0\n", + "episode 3010, step 60200, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 5.0\n", + "episode 3015, step 60300, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 1.0\n", + "episode 3020, step 60400, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 3.0\n", + "episode 3025, step 60500, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 2.0\n", + "episode 3030, step 60600, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 4.0\n", + "episode 3035, step 60700, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 4.0\n", + "episode 3040, step 60800, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 2.0\n", + "episode 3045, step 60900, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 1.0\n", + "episode 3050, step 61000, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 1.0\n", + "episode 3055, step 61100, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 1.0\n", + "episode 3060, step 61200, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 3.0\n", + "episode 3065, step 61300, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 3.0\n", + "episode 3070, step 61400, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 4.0\n", + "episode 3075, step 61500, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 2.0\n", + "episode 3080, step 61600, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 2.0\n", + "episode 3085, step 61700, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 4.0\n", + "episode 3090, step 61800, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 1.0\n", + "episode 3095, step 61900, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 4.0\n", + "episode 3100, step 62000, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 3.0\n", + "episode 3105, step 62100, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 7.0\n", + "episode 3110, step 62200, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 3.0\n", + "episode 3115, step 62300, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 3.0\n", + "episode 3120, step 62400, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 0.0\n", + "episode 3125, step 62500, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 4.0\n", + "episode 3130, step 62600, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 2.0\n", + "episode 3135, step 62700, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 4.0\n", + "episode 3140, step 62800, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 1.0\n", + "episode 3145, step 62900, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 2.0\n", + "episode 3150, step 63000, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 2.0\n", + "episode 3155, step 63100, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 4.0\n", + "episode 3160, step 63200, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 3.0\n", + "episode 3165, step 63300, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 2.0\n", + "episode 3170, step 63400, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 2.0\n", + "episode 3175, step 63500, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 2.0\n", + "episode 3180, step 63600, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 4.0\n", + "episode 3185, step 63700, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 1.0\n", + "episode 3190, step 63800, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 0.0\n", + "episode 3195, step 63900, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 2.0\n", + "episode 3200, step 64000, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 5.0\n", + "episode 3205, step 64100, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 2.0\n", + "episode 3210, step 64200, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 4.0\n", + "episode 3215, step 64300, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 0.0\n", + "episode 3220, step 64400, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 4.0\n", + "episode 3225, step 64500, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 1.0\n", + "episode 3230, step 64600, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 5.0\n", + "episode 3235, step 64700, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 0.0\n", + "episode 3240, step 64800, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 3.0\n", + "episode 3245, step 64900, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 1.0\n", + "episode 3250, step 65000, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 6.0\n", + "episode 3255, step 65100, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 1.0\n", + "episode 3260, step 65200, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 0.0\n", + "episode 3265, step 65300, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 1.0\n", + "episode 3270, step 65400, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 5.0\n", + "episode 3275, step 65500, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 3.0\n", + "episode 3280, step 65600, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 3.0\n", + "episode 3285, step 65700, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 1.0\n", + "episode 3290, step 65800, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 4.0\n", + "episode 3295, step 65900, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 5.0\n", + "episode 3300, step 66000, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 2.0\n", + "episode 3305, step 66100, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 3.0\n", + "episode 3310, step 66200, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 2.0\n", + "episode 3315, step 66300, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 4.0\n", + "episode 3320, step 66400, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 2.0\n", + "episode 3325, step 66500, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 4.0\n", + "episode 3330, step 66600, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 2.0\n", + "episode 3335, step 66700, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 1.0\n", + "episode 3340, step 66800, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 3.0\n", + "episode 3345, step 66900, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 3.0\n", + "episode 3350, step 67000, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 5.0\n", + "episode 3355, step 67100, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 2.0\n", + "episode 3360, step 67200, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 2.0\n", + "episode 3365, step 67300, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 5.0\n", + "episode 3370, step 67400, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 2.0\n", + "episode 3375, step 67500, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 3.0\n", + "episode 3380, step 67600, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 5.0\n", + "episode 3385, step 67700, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 1.0\n", + "episode 3390, step 67800, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 2.0\n", + "episode 3395, step 67900, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 2.0\n", + "episode 3400, step 68000, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 2.0\n", + "episode 3405, step 68100, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 6.0\n", + "episode 3410, step 68200, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 2.0\n", + "episode 3415, step 68300, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 2.0\n", + "episode 3420, step 68400, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 1.0\n", + "episode 3425, step 68500, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 2.0\n", + "episode 3430, step 68600, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 3.0\n", + "episode 3435, step 68700, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 1.0\n", + "episode 3440, step 68800, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 6.0\n", + "episode 3445, step 68900, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 3.0\n", + "episode 3450, step 69000, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 3.0\n", + "episode 3455, step 69100, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 3.0\n", + "episode 3460, step 69200, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 4.0\n", + "episode 3465, step 69300, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 4.0\n", + "episode 3470, step 69400, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 0.0\n", + "episode 3475, step 69500, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 1.0\n", + "episode 3480, step 69600, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 3.0\n", + "episode 3485, step 69700, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 2.0\n", + "episode 3490, step 69800, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 2.0\n", + "episode 3495, step 69900, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 2.0\n", + "episode 3500, step 70000, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 2.0\n", + "episode 3505, step 70100, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 0.0\n", + "episode 3510, step 70200, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 3.0\n", + "episode 3515, step 70300, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 1.0\n", + "episode 3520, step 70400, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 3.0\n", + "episode 3525, step 70500, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 3.0\n", + "episode 3530, step 70600, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 4.0\n", + "episode 3535, step 70700, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 1.0\n", + "episode 3540, step 70800, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 1.0\n", + "episode 3545, step 70900, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 2.0\n", + "episode 3550, step 71000, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 3.0\n", + "episode 3555, step 71100, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 2.0\n", + "episode 3560, step 71200, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 3.0\n", + "episode 3565, step 71300, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 3.0\n", + "episode 3570, step 71400, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 2.0\n", + "episode 3575, step 71500, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 1.0\n", + "episode 3580, step 71600, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 3.0\n", + "episode 3585, step 71700, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 3.0\n", + "episode 3590, step 71800, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 5.0\n", + "episode 3595, step 71900, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 0.0\n", + "episode 3600, step 72000, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 3.0\n", + "episode 3605, step 72100, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 2.0\n", + "episode 3610, step 72200, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 6.0\n", + "episode 3615, step 72300, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 4.0\n", + "episode 3620, step 72400, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 4.0\n", + "episode 3625, step 72500, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 4.0\n", + "episode 3630, step 72600, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 1.0\n", + "episode 3635, step 72700, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 3.0\n", + "episode 3640, step 72800, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 4.0\n", + "episode 3645, step 72900, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 2.0\n", + "episode 3650, step 73000, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 3.0\n", + "episode 3655, step 73100, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 2.0\n", + "episode 3660, step 73200, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 3.0\n", + "episode 3665, step 73300, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 3.0\n", + "episode 3670, step 73400, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 3.0\n", + "episode 3675, step 73500, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 3.0\n", + "episode 3680, step 73600, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 1.0\n", + "episode 3685, step 73700, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 1.0\n", + "episode 3690, step 73800, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 1.0\n", + "episode 3695, step 73900, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 0.0\n", + "episode 3700, step 74000, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 2.0\n", + "episode 3705, step 74100, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 4.0\n", + "episode 3710, step 74200, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 4.0\n", + "episode 3715, step 74300, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 2.0\n", + "episode 3720, step 74400, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 2.0\n", + "episode 3725, step 74500, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 2.0\n", + "episode 3730, step 74600, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 2.0\n", + "episode 3735, step 74700, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 1.0\n", + "episode 3740, step 74800, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 3.0\n", + "episode 3745, step 74900, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 2.0\n", + "episode 3750, step 75000, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 3.0\n", + "episode 3755, step 75100, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 3.0\n", + "episode 3760, step 75200, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 3.0\n", + "episode 3765, step 75300, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 4.0\n", + "episode 3770, step 75400, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 0.0\n", + "episode 3775, step 75500, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 4.0\n", + "episode 3780, step 75600, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 4.0\n", + "episode 3785, step 75700, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 3.0\n", + "episode 3790, step 75800, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 1.0\n", + "episode 3795, step 75900, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 3.0\n", + "episode 3800, step 76000, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 1.0\n", + "episode 3805, step 76100, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 1.0\n", + "episode 3810, step 76200, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 5.0\n", + "episode 3815, step 76300, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 1.0\n", + "episode 3820, step 76400, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 2.0\n", + "episode 3825, step 76500, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 7.0\n", + "episode 3830, step 76600, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 2.0\n", + "episode 3835, step 76700, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 1.0\n", + "episode 3840, step 76800, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 2.0\n", + "episode 3845, step 76900, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 3.0\n", + "episode 3850, step 77000, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 4.0\n", + "episode 3855, step 77100, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 1.0\n", + "episode 3860, step 77200, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 2.0\n", + "episode 3865, step 77300, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 2.0\n", + "episode 3870, step 77400, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 3.0\n", + "episode 3875, step 77500, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 4.0\n", + "episode 3880, step 77600, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 2.0\n", + "episode 3885, step 77700, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 6.0\n", + "episode 3890, step 77800, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 2.0\n", + "episode 3895, step 77900, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 2.0\n", + "episode 3900, step 78000, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 2.0\n", + "episode 3905, step 78100, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 5.0\n", + "episode 3910, step 78200, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 3.0\n", + "episode 3915, step 78300, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 2.0\n", + "episode 3920, step 78400, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 3.0\n", + "episode 3925, step 78500, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 5.0\n", + "episode 3930, step 78600, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 3.0\n", + "episode 3935, step 78700, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 3.0\n", + "episode 3940, step 78800, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 2.0\n", + "episode 3945, step 78900, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 2.0\n", + "episode 3950, step 79000, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 1.0\n", + "episode 3955, step 79100, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 2.0\n", + "episode 3960, step 79200, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 3.0\n", + "episode 3965, step 79300, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 1.0\n", + "episode 3970, step 79400, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 1.0\n", + "episode 3975, step 79500, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 1.0\n", + "episode 3980, step 79600, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 5.0\n", + "episode 3985, step 79700, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 5.0\n", + "episode 3990, step 79800, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 7.0\n", + "episode 3995, step 79900, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 1.0\n", + "episode 4000, step 80000, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 4.0\n", + "episode 4005, step 80100, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 0.0\n", + "episode 4010, step 80200, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 2.0\n", + "episode 4015, step 80300, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 4.0\n", + "episode 4020, step 80400, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 2.0\n", + "episode 4025, step 80500, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 1.0\n", + "episode 4030, step 80600, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 2.0\n", + "episode 4035, step 80700, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 1.0\n", + "episode 4040, step 80800, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 3.0\n", + "episode 4045, step 80900, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 0.0\n", + "episode 4050, step 81000, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 0.0\n", + "episode 4055, step 81100, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 1.0\n", + "episode 4060, step 81200, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 4.0\n", + "episode 4065, step 81300, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 6.0\n", + "episode 4070, step 81400, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 4.0\n", + "episode 4075, step 81500, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 2.0\n", + "episode 4080, step 81600, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 0.0\n", + "episode 4085, step 81700, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 2.0\n", + "episode 4090, step 81800, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 0.0\n", + "episode 4095, step 81900, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 6.0\n", + "episode 4100, step 82000, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 2.0\n", + "episode 4105, step 82100, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 2.0\n", + "episode 4110, step 82200, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 2.0\n", + "episode 4115, step 82300, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 3.0\n", + "episode 4120, step 82400, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 4.0\n", + "episode 4125, step 82500, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 2.0\n", + "episode 4130, step 82600, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 1.0\n", + "episode 4135, step 82700, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 3.0\n", + "episode 4140, step 82800, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 2.0\n", + "episode 4145, step 82900, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 3.0\n", + "episode 4150, step 83000, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 2.0\n", + "episode 4155, step 83100, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 4.0\n", + "episode 4160, step 83200, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 2.0\n", + "episode 4165, step 83300, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 1.0\n", + "episode 4170, step 83400, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 1.0\n", + "episode 4175, step 83500, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 2.0\n", + "episode 4180, step 83600, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 3.0\n", + "episode 4185, step 83700, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 4.0\n", + "episode 4190, step 83800, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 2.0\n", + "episode 4195, step 83900, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 5.0\n", + "episode 4200, step 84000, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 2.0\n", + "episode 4205, step 84100, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 2.0\n", + "episode 4210, step 84200, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 2.0\n", + "episode 4215, step 84300, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 4.0\n", + "episode 4220, step 84400, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 2.0\n", + "episode 4225, step 84500, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 2.0\n", + "episode 4230, step 84600, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 3.0\n", + "episode 4235, step 84700, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 3.0\n", + "episode 4240, step 84800, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 1.0\n", + "episode 4245, step 84900, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 2.0\n", + "episode 4250, step 85000, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 3.0\n", + "episode 4255, step 85100, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 2.0\n", + "episode 4260, step 85200, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 2.0\n", + "episode 4265, step 85300, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 3.0\n", + "episode 4270, step 85400, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 2.0\n", + "episode 4275, step 85500, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 3.0\n", + "episode 4280, step 85600, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 2.0\n", + "episode 4285, step 85700, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 1.0\n", + "episode 4290, step 85800, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 2.0\n", + "episode 4295, step 85900, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 3.0\n", + "episode 4300, step 86000, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 2.0\n", + "episode 4305, step 86100, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 1.0\n", + "episode 4310, step 86200, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 1.0\n", + "episode 4315, step 86300, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 3.0\n", + "episode 4320, step 86400, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 3.0\n", + "episode 4325, step 86500, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 3.0\n", + "episode 4330, step 86600, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 1.0\n", + "episode 4335, step 86700, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 2.0\n", + "episode 4340, step 86800, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 7.0\n", + "episode 4345, step 86900, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 3.0\n", + "episode 4350, step 87000, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 4.0\n", + "episode 4355, step 87100, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 4.0\n", + "episode 4360, step 87200, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 2.0\n", + "episode 4365, step 87300, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 3.0\n", + "episode 4370, step 87400, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 7.0\n", + "episode 4375, step 87500, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 3.0\n", + "episode 4380, step 87600, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 3.0\n", + "episode 4385, step 87700, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 2.0\n", + "episode 4390, step 87800, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 1.0\n", + "episode 4395, step 87900, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 4.0\n", + "episode 4400, step 88000, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 2.0\n", + "episode 4405, step 88100, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 3.0\n", + "episode 4410, step 88200, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 4.0\n", + "episode 4415, step 88300, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 2.0\n", + "episode 4420, step 88400, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 4.0\n", + "episode 4425, step 88500, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 1.0\n", + "episode 4430, step 88600, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 3.0\n", + "episode 4435, step 88700, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 5.0\n", + "episode 4440, step 88800, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 1.0\n", + "episode 4445, step 88900, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 1.0\n", + "episode 4450, step 89000, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 3.0\n", + "episode 4455, step 89100, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 2.0\n", + "episode 4460, step 89200, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 3.0\n", + "episode 4465, step 89300, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 2.0\n", + "episode 4470, step 89400, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 1.0\n", + "episode 4475, step 89500, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 2.0\n", + "episode 4480, step 89600, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 2.0\n", + "episode 4485, step 89700, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 1.0\n", + "episode 4490, step 89800, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 0.0\n", + "episode 4495, step 89900, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 5.0\n", + "episode 4500, step 90000, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 2.0\n", + "episode 4505, step 90100, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 2.0\n", + "episode 4510, step 90200, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 2.0\n", + "episode 4515, step 90300, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 3.0\n", + "episode 4520, step 90400, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 1.0\n", + "episode 4525, step 90500, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 2.0\n", + "episode 4530, step 90600, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 4.0\n", + "episode 4535, step 90700, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 3.0\n", + "episode 4540, step 90800, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 3.0\n", + "episode 4545, step 90900, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 2.0\n", + "episode 4550, step 91000, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 3.0\n", + "episode 4555, step 91100, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 2.0\n", + "episode 4560, step 91200, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 1.0\n", + "episode 4565, step 91300, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 4.0\n", + "episode 4570, step 91400, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 2.0\n", + "episode 4575, step 91500, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 1.0\n", + "episode 4580, step 91600, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 1.0\n", + "episode 4585, step 91700, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 3.0\n", + "episode 4590, step 91800, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 1.0\n", + "episode 4595, step 91900, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 3.0\n", + "episode 4600, step 92000, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 4.0\n", + "episode 4605, step 92100, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 0.0\n", + "episode 4610, step 92200, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 2.0\n", + "episode 4615, step 92300, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 3.0\n", + "episode 4620, step 92400, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 2.0\n", + "episode 4625, step 92500, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 4.0\n", + "episode 4630, step 92600, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 3.0\n", + "episode 4635, step 92700, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 2.0\n", + "episode 4640, step 92800, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 4.0\n", + "episode 4645, step 92900, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 5.0\n", + "episode 4650, step 93000, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 1.0\n", + "episode 4655, step 93100, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 4.0\n", + "episode 4660, step 93200, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 2.0\n", + "episode 4665, step 93300, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 1.0\n", + "episode 4670, step 93400, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 2.0\n", + "episode 4675, step 93500, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 5.0\n", + "episode 4680, step 93600, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 3.0\n", + "episode 4685, step 93700, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 2.0\n", + "episode 4690, step 93800, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 4.0\n", + "episode 4695, step 93900, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 4.0\n", + "episode 4700, step 94000, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 1.0\n", + "episode 4705, step 94100, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 3.0\n", + "episode 4710, step 94200, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 1.0\n", + "episode 4715, step 94300, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 4.0\n", + "episode 4720, step 94400, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 2.0\n", + "episode 4725, step 94500, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 3.0\n", + "episode 4730, step 94600, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 2.0\n", + "episode 4735, step 94700, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 4.0\n", + "episode 4740, step 94800, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 3.0\n", + "episode 4745, step 94900, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 1.0\n", + "episode 4750, step 95000, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 4.0\n", + "episode 4755, step 95100, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 0.0\n", + "episode 4760, step 95200, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 4.0\n", + "episode 4765, step 95300, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 3.0\n", + "episode 4770, step 95400, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 3.0\n", + "episode 4775, step 95500, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 2.0\n", + "episode 4780, step 95600, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 1.0\n", + "episode 4785, step 95700, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 1.0\n", + "episode 4790, step 95800, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 1.0\n", + "episode 4795, step 95900, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 3.0\n", + "episode 4800, step 96000, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 2.0\n", + "episode 4805, step 96100, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 3.0\n", + "episode 4810, step 96200, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 4.0\n", + "episode 4815, step 96300, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 1.0\n", + "episode 4820, step 96400, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 3.0\n", + "episode 4825, step 96500, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 3.0\n", + "episode 4830, step 96600, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 3.0\n", + "episode 4835, step 96700, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 2.0\n", + "episode 4840, step 96800, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 3.0\n", + "episode 4845, step 96900, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 1.0\n", + "episode 4850, step 97000, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 3.0\n", + "episode 4855, step 97100, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 2.0\n", + "episode 4860, step 97200, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 5.0\n", + "episode 4865, step 97300, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 0.0\n", + "episode 4870, step 97400, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 3.0\n", + "episode 4875, step 97500, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 1.0\n", + "episode 4880, step 97600, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 4.0\n", + "episode 4885, step 97700, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 2.0\n", + "episode 4890, step 97800, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 2.0\n", + "episode 4895, step 97900, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 4.0\n", + "episode 4900, step 98000, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 3.0\n", + "episode 4905, step 98100, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 3.0\n", + "episode 4910, step 98200, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 1.0\n", + "episode 4915, step 98300, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 2.0\n", + "episode 4920, step 98400, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 4.0\n", + "episode 4925, step 98500, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 2.0\n", + "episode 4930, step 98600, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 4.0\n", + "episode 4935, step 98700, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 3.0\n", + "episode 4940, step 98800, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 3.0\n", + "episode 4945, step 98900, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 0.0\n", + "episode 4950, step 99000, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 2.0\n", + "episode 4955, step 99100, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 3.0\n", + "episode 4960, step 99200, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 1.0\n", + "episode 4965, step 99300, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 4.0\n", + "episode 4970, step 99400, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 1.0\n", + "episode 4975, step 99500, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 1.0\n", + "episode 4980, step 99600, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 1.0\n", + "episode 4985, step 99700, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 3.0\n", + "episode 4990, step 99800, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 1.0\n", + "episode 4995, step 99900, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 1.0\n", + "episode 5000, step 100000, agent=PearlAgent with DeepQLearning, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), FIFOOffPolicyReplayBuffer, env=\n", + "return: 0.0\n" + ] }, { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Bootstrapped DQN Agent with LSTM History Summarization\n", - "\n", - "Leveraging the deep exploration value-based algorithm, now the agent can achieve a better performance in a much faster way while being able to still leverage history summarization capability. " + "data": { + "image/png": "", + "text/plain": [ + "
" ] - }, + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "# Add a LSTM history summarization module\n", + "\n", + "agent = PearlAgent(\n", + " policy_learner=DeepQLearning(\n", + " state_dim=128,\n", + " action_space=action_space,\n", + " hidden_dims=[64, 64],\n", + " training_rounds=50,\n", + " action_representation_module=action_representation_module,\n", + " ),\n", + " history_summarization_module=LSTMHistorySummarizationModule(\n", + " observation_dim=1,\n", + " action_dim=100,\n", + " hidden_dim=128,\n", + " history_length=8,\n", + " ),\n", + " replay_buffer=FIFOOffPolicyReplayBuffer(100_000),\n", + " device_id=-1,\n", + ")\n", + "\n", + "info = online_learning(\n", + " agent=agent,\n", + " env=env,\n", + " number_of_steps=number_of_steps,\n", + " print_every_x_steps=100,\n", + " record_period=record_period,\n", + " learn_after_episode=True,\n", + ")\n", + "torch.save(info[\"return\"], \"DQN-LSTM-return.pt\")\n", + "plt.plot(record_period * np.arange(len(info[\"return\"])), info[\"return\"], label=\"DQN-LSTM\")\n", + "plt.legend()\n", + "plt.show()" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Bootstrapped DQN Agent with LSTM History Summarization\n", + "\n", + "Leveraging the deep exploration value-based algorithm, now the agent can achieve a better performance in a much faster way while being able to still leverage history summarization capability. Note how top average performance takes around 20,000 steps in the graph above, but only about 5,000 steps in the graph below." + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": { + "id": "_7Cpzoi3nVAw", + "vscode": { + "languageId": "python" + } + }, + "outputs": [ { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "_7Cpzoi3nVAw" - }, - "outputs": [], - "source": [ - "# Better exploration with BootstrappedDQN-LSTM\n", - "\n", - "agent = PearlAgent(\n", - " policy_learner=BootstrappedDQN(\n", - " q_ensemble_network=EnsembleQValueNetwork(\n", - " state_dim=128,\n", - " action_dim=100,\n", - " ensemble_size=10,\n", - " output_dim=1,\n", - " hidden_dims=[64, 64],\n", - " prior_scale=0.3,\n", - " ),\n", - " action_space=action_space,\n", - " training_rounds=50,\n", - " action_representation_module=action_representation_module,\n", - " ),\n", - " history_summarization_module=LSTMHistorySummarizationModule(\n", - " observation_dim=1,\n", - " action_dim=100,\n", - " hidden_dim=128,\n", - " history_length=8,\n", - " ),\n", - " replay_buffer=BootstrapReplayBuffer(100_000, 1.0, 10),\n", - " device_id=-1,\n", - ")\n", - "\n", - "info = online_learning(\n", - " agent=agent,\n", - " env=env,\n", - " number_of_steps=number_of_steps,\n", - " print_every_x_steps=100,\n", - " record_period=record_period,\n", - " learn_after_episode=True,\n", - ")\n", - "torch.save(info[\"return\"], \"BootstrappedDQN-LSTM-return.pt\")\n", - "plt.plot(record_period * np.arange(len(info[\"return\"])), info[\"return\"], label=\"BootstrappedDQN-LSTM\")\n", - "plt.legend()\n", - "plt.show()" - ] + "name": "stdout", + "output_type": "stream", + "text": [ + "episode 5, step 100, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 0.0\n", + "episode 10, step 200, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 0.0\n", + "episode 15, step 300, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 3.0\n", + "episode 20, step 400, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 3.0\n", + "episode 25, step 500, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 1.0\n", + "episode 30, step 600, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 2.0\n", + "episode 35, step 700, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 3.0\n", + "episode 40, step 800, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 2.0\n", + "episode 45, step 900, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 1.0\n", + "episode 50, step 1000, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 1.0\n", + "episode 55, step 1100, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 0.0\n", + "episode 60, step 1200, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 1.0\n", + "episode 65, step 1300, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 2.0\n", + "episode 70, step 1400, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 1.0\n", + "episode 75, step 1500, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 1.0\n", + "episode 80, step 1600, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 2.0\n", + "episode 85, step 1700, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 1.0\n", + "episode 90, step 1800, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 1.0\n", + "episode 95, step 1900, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 2.0\n", + "episode 100, step 2000, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 2.0\n", + "episode 105, step 2100, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 0.0\n", + "episode 110, step 2200, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 2.0\n", + "episode 115, step 2300, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 3.0\n", + "episode 120, step 2400, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 2.0\n", + "episode 125, step 2500, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 0.0\n", + "episode 130, step 2600, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 2.0\n", + "episode 135, step 2700, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 1.0\n", + "episode 140, step 2800, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 1.0\n", + "episode 145, step 2900, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 1.0\n", + "episode 150, step 3000, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 1.0\n", + "episode 155, step 3100, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 7.0\n", + "episode 160, step 3200, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 3.0\n", + "episode 165, step 3300, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 1.0\n", + "episode 170, step 3400, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 1.0\n", + "episode 175, step 3500, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 2.0\n", + "episode 180, step 3600, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 2.0\n", + "episode 185, step 3700, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 3.0\n", + "episode 190, step 3800, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 2.0\n", + "episode 195, step 3900, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 1.0\n", + "episode 200, step 4000, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 2.0\n", + "episode 205, step 4100, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 4.0\n", + "episode 210, step 4200, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 3.0\n", + "episode 215, step 4300, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 3.0\n", + "episode 220, step 4400, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 3.0\n", + "episode 225, step 4500, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 0.0\n", + "episode 230, step 4600, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 5.0\n", + "episode 235, step 4700, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 1.0\n", + "episode 240, step 4800, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 2.0\n", + "episode 245, step 4900, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 3.0\n", + "episode 250, step 5000, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 1.0\n", + "episode 255, step 5100, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 2.0\n", + "episode 260, step 5200, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 2.0\n", + "episode 265, step 5300, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 1.0\n", + "episode 270, step 5400, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 3.0\n", + "episode 275, step 5500, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 5.0\n", + "episode 280, step 5600, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 4.0\n", + "episode 285, step 5700, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 1.0\n", + "episode 290, step 5800, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 3.0\n", + "episode 295, step 5900, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 5.0\n", + "episode 300, step 6000, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 3.0\n", + "episode 305, step 6100, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 4.0\n", + "episode 310, step 6200, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 3.0\n", + "episode 315, step 6300, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 2.0\n", + "episode 320, step 6400, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 2.0\n", + "episode 325, step 6500, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 3.0\n", + "episode 330, step 6600, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 3.0\n", + "episode 335, step 6700, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 2.0\n", + "episode 340, step 6800, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 2.0\n", + "episode 345, step 6900, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 1.0\n", + "episode 350, step 7000, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 1.0\n", + "episode 355, step 7100, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 1.0\n", + "episode 360, step 7200, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 3.0\n", + "episode 365, step 7300, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 2.0\n", + "episode 370, step 7400, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 3.0\n", + "episode 375, step 7500, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 1.0\n", + "episode 380, step 7600, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 3.0\n", + "episode 385, step 7700, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 4.0\n", + "episode 390, step 7800, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 3.0\n", + "episode 395, step 7900, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 5.0\n", + "episode 400, step 8000, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 3.0\n", + "episode 405, step 8100, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 4.0\n", + "episode 410, step 8200, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 4.0\n", + "episode 415, step 8300, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 1.0\n", + "episode 420, step 8400, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 1.0\n", + "episode 425, step 8500, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 3.0\n", + "episode 430, step 8600, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 2.0\n", + "episode 435, step 8700, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 3.0\n", + "episode 440, step 8800, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 3.0\n", + "episode 445, step 8900, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 3.0\n", + "episode 450, step 9000, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 2.0\n", + "episode 455, step 9100, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 4.0\n", + "episode 460, step 9200, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 2.0\n", + "episode 465, step 9300, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 3.0\n", + "episode 470, step 9400, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 3.0\n", + "episode 475, step 9500, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 3.0\n", + "episode 480, step 9600, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 2.0\n", + "episode 485, step 9700, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 2.0\n", + "episode 490, step 9800, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 4.0\n", + "episode 495, step 9900, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 2.0\n", + "episode 500, step 10000, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 3.0\n", + "episode 505, step 10100, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 5.0\n", + "episode 510, step 10200, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 1.0\n", + "episode 515, step 10300, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 3.0\n", + "episode 520, step 10400, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 1.0\n", + "episode 525, step 10500, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 2.0\n", + "episode 530, step 10600, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 2.0\n", + "episode 535, step 10700, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 1.0\n", + "episode 540, step 10800, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 3.0\n", + "episode 545, step 10900, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 3.0\n", + "episode 550, step 11000, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 2.0\n", + "episode 555, step 11100, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 4.0\n", + "episode 560, step 11200, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 2.0\n", + "episode 565, step 11300, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 3.0\n", + "episode 570, step 11400, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 0.0\n", + "episode 575, step 11500, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 3.0\n", + "episode 580, step 11600, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 1.0\n", + "episode 585, step 11700, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 3.0\n", + "episode 590, step 11800, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 2.0\n", + "episode 595, step 11900, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 1.0\n", + "episode 600, step 12000, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 2.0\n", + "episode 605, step 12100, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 2.0\n", + "episode 610, step 12200, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 1.0\n", + "episode 615, step 12300, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 5.0\n", + "episode 620, step 12400, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 1.0\n", + "episode 625, step 12500, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 3.0\n", + "episode 630, step 12600, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 4.0\n", + "episode 635, step 12700, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 1.0\n", + "episode 640, step 12800, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 3.0\n", + "episode 645, step 12900, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 1.0\n", + "episode 650, step 13000, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 1.0\n", + "episode 655, step 13100, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 3.0\n", + "episode 660, step 13200, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 1.0\n", + "episode 665, step 13300, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 0.0\n", + "episode 670, step 13400, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 2.0\n", + "episode 675, step 13500, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 1.0\n", + "episode 680, step 13600, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 5.0\n", + "episode 685, step 13700, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 4.0\n", + "episode 690, step 13800, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 2.0\n", + "episode 695, step 13900, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 4.0\n", + "episode 700, step 14000, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 2.0\n", + "episode 705, step 14100, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 2.0\n", + "episode 710, step 14200, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 1.0\n", + "episode 715, step 14300, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 2.0\n", + "episode 720, step 14400, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 2.0\n", + "episode 725, step 14500, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 3.0\n", + "episode 730, step 14600, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 2.0\n", + "episode 735, step 14700, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 4.0\n", + "episode 740, step 14800, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 4.0\n", + "episode 745, step 14900, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 3.0\n", + "episode 750, step 15000, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 4.0\n", + "episode 755, step 15100, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 4.0\n", + "episode 760, step 15200, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 2.0\n", + "episode 765, step 15300, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 1.0\n", + "episode 770, step 15400, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 1.0\n", + "episode 775, step 15500, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 4.0\n", + "episode 780, step 15600, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 3.0\n", + "episode 785, step 15700, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 2.0\n", + "episode 790, step 15800, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 2.0\n", + "episode 795, step 15900, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 3.0\n", + "episode 800, step 16000, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 4.0\n", + "episode 805, step 16100, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 8.0\n", + "episode 810, step 16200, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 5.0\n", + "episode 815, step 16300, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 2.0\n", + "episode 820, step 16400, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 4.0\n", + "episode 825, step 16500, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 2.0\n", + "episode 830, step 16600, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 1.0\n", + "episode 835, step 16700, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 3.0\n", + "episode 840, step 16800, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 1.0\n", + "episode 845, step 16900, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 1.0\n", + "episode 850, step 17000, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 4.0\n", + "episode 855, step 17100, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 2.0\n", + "episode 860, step 17200, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 2.0\n", + "episode 865, step 17300, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 3.0\n", + "episode 870, step 17400, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 2.0\n", + "episode 875, step 17500, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 2.0\n", + "episode 880, step 17600, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 1.0\n", + "episode 885, step 17700, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 2.0\n", + "episode 890, step 17800, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 5.0\n", + "episode 895, step 17900, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 2.0\n", + "episode 900, step 18000, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 2.0\n", + "episode 905, step 18100, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 5.0\n", + "episode 910, step 18200, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 2.0\n", + "episode 915, step 18300, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 3.0\n", + "episode 920, step 18400, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 3.0\n", + "episode 925, step 18500, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 2.0\n", + "episode 930, step 18600, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 2.0\n", + "episode 935, step 18700, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 0.0\n", + "episode 940, step 18800, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 1.0\n", + "episode 945, step 18900, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 2.0\n", + "episode 950, step 19000, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 2.0\n", + "episode 955, step 19100, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 2.0\n", + "episode 960, step 19200, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 1.0\n", + "episode 965, step 19300, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 1.0\n", + "episode 970, step 19400, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 1.0\n", + "episode 975, step 19500, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 2.0\n", + "episode 980, step 19600, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 1.0\n", + "episode 985, step 19700, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 2.0\n", + "episode 990, step 19800, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 2.0\n", + "episode 995, step 19900, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 4.0\n", + "episode 1000, step 20000, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 4.0\n", + "episode 1005, step 20100, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 2.0\n", + "episode 1010, step 20200, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 3.0\n", + "episode 1015, step 20300, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 0.0\n", + "episode 1020, step 20400, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 4.0\n", + "episode 1025, step 20500, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 3.0\n", + "episode 1030, step 20600, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 1.0\n", + "episode 1035, step 20700, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 3.0\n", + "episode 1040, step 20800, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 3.0\n", + "episode 1045, step 20900, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 0.0\n", + "episode 1050, step 21000, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 4.0\n", + "episode 1055, step 21100, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 2.0\n", + "episode 1060, step 21200, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 3.0\n", + "episode 1065, step 21300, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 2.0\n", + "episode 1070, step 21400, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 4.0\n", + "episode 1075, step 21500, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 1.0\n", + "episode 1080, step 21600, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 1.0\n", + "episode 1085, step 21700, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 2.0\n", + "episode 1090, step 21800, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 4.0\n", + "episode 1095, step 21900, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 3.0\n", + "episode 1100, step 22000, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 1.0\n", + "episode 1105, step 22100, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 2.0\n", + "episode 1110, step 22200, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 3.0\n", + "episode 1115, step 22300, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 3.0\n", + "episode 1120, step 22400, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 2.0\n", + "episode 1125, step 22500, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 6.0\n", + "episode 1130, step 22600, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 5.0\n", + "episode 1135, step 22700, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 3.0\n", + "episode 1140, step 22800, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 3.0\n", + "episode 1145, step 22900, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 1.0\n", + "episode 1150, step 23000, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 2.0\n", + "episode 1155, step 23100, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 3.0\n", + "episode 1160, step 23200, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 3.0\n", + "episode 1165, step 23300, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 1.0\n", + "episode 1170, step 23400, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 1.0\n", + "episode 1175, step 23500, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 3.0\n", + "episode 1180, step 23600, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 2.0\n", + "episode 1185, step 23700, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 3.0\n", + "episode 1190, step 23800, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 3.0\n", + "episode 1195, step 23900, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 2.0\n", + "episode 1200, step 24000, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 3.0\n", + "episode 1205, step 24100, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 3.0\n", + "episode 1210, step 24200, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 3.0\n", + "episode 1215, step 24300, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 3.0\n", + "episode 1220, step 24400, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 1.0\n", + "episode 1225, step 24500, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 1.0\n", + "episode 1230, step 24600, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 4.0\n", + "episode 1235, step 24700, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 0.0\n", + "episode 1240, step 24800, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 1.0\n", + "episode 1245, step 24900, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 4.0\n", + "episode 1250, step 25000, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 1.0\n", + "episode 1255, step 25100, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 1.0\n", + "episode 1260, step 25200, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 3.0\n", + "episode 1265, step 25300, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 0.0\n", + "episode 1270, step 25400, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 2.0\n", + "episode 1275, step 25500, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 2.0\n", + "episode 1280, step 25600, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 1.0\n", + "episode 1285, step 25700, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 1.0\n", + "episode 1290, step 25800, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 2.0\n", + "episode 1295, step 25900, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 2.0\n", + "episode 1300, step 26000, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 3.0\n", + "episode 1305, step 26100, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 4.0\n", + "episode 1310, step 26200, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 2.0\n", + "episode 1315, step 26300, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 2.0\n", + "episode 1320, step 26400, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 1.0\n", + "episode 1325, step 26500, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 2.0\n", + "episode 1330, step 26600, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 0.0\n", + "episode 1335, step 26700, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 3.0\n", + "episode 1340, step 26800, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 4.0\n", + "episode 1345, step 26900, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 1.0\n", + "episode 1350, step 27000, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 3.0\n", + "episode 1355, step 27100, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 1.0\n", + "episode 1360, step 27200, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 2.0\n", + "episode 1365, step 27300, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 2.0\n", + "episode 1370, step 27400, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 0.0\n", + "episode 1375, step 27500, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 3.0\n", + "episode 1380, step 27600, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 2.0\n", + "episode 1385, step 27700, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 3.0\n", + "episode 1390, step 27800, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 3.0\n", + "episode 1395, step 27900, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 1.0\n", + "episode 1400, step 28000, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 5.0\n", + "episode 1405, step 28100, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 3.0\n", + "episode 1410, step 28200, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 2.0\n", + "episode 1415, step 28300, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 3.0\n", + "episode 1420, step 28400, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 3.0\n", + "episode 1425, step 28500, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 3.0\n", + "episode 1430, step 28600, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 4.0\n", + "episode 1435, step 28700, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 2.0\n", + "episode 1440, step 28800, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 2.0\n", + "episode 1445, step 28900, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 1.0\n", + "episode 1450, step 29000, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 3.0\n", + "episode 1455, step 29100, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 1.0\n", + "episode 1460, step 29200, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 1.0\n", + "episode 1465, step 29300, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 2.0\n", + "episode 1470, step 29400, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 3.0\n", + "episode 1475, step 29500, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 3.0\n", + "episode 1480, step 29600, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 1.0\n", + "episode 1485, step 29700, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 2.0\n", + "episode 1490, step 29800, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 3.0\n", + "episode 1495, step 29900, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 1.0\n", + "episode 1500, step 30000, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 4.0\n", + "episode 1505, step 30100, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 1.0\n", + "episode 1510, step 30200, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 2.0\n", + "episode 1515, step 30300, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 2.0\n", + "episode 1520, step 30400, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 4.0\n", + "episode 1525, step 30500, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 1.0\n", + "episode 1530, step 30600, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 1.0\n", + "episode 1535, step 30700, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 1.0\n", + "episode 1540, step 30800, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 2.0\n", + "episode 1545, step 30900, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 1.0\n", + "episode 1550, step 31000, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 2.0\n", + "episode 1555, step 31100, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 2.0\n", + "episode 1560, step 31200, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 3.0\n", + "episode 1565, step 31300, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 4.0\n", + "episode 1570, step 31400, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 0.0\n", + "episode 1575, step 31500, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 3.0\n", + "episode 1580, step 31600, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 5.0\n", + "episode 1585, step 31700, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 2.0\n", + "episode 1590, step 31800, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 1.0\n", + "episode 1595, step 31900, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 0.0\n", + "episode 1600, step 32000, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 1.0\n", + "episode 1605, step 32100, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 2.0\n", + "episode 1610, step 32200, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 2.0\n", + "episode 1615, step 32300, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 3.0\n", + "episode 1620, step 32400, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 3.0\n", + "episode 1625, step 32500, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 2.0\n", + "episode 1630, step 32600, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 1.0\n", + "episode 1635, step 32700, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 4.0\n", + "episode 1640, step 32800, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 1.0\n", + "episode 1645, step 32900, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 2.0\n", + "episode 1650, step 33000, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 3.0\n", + "episode 1655, step 33100, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 3.0\n", + "episode 1660, step 33200, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 2.0\n", + "episode 1665, step 33300, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 1.0\n", + "episode 1670, step 33400, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 2.0\n", + "episode 1675, step 33500, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 0.0\n", + "episode 1680, step 33600, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 1.0\n", + "episode 1685, step 33700, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 3.0\n", + "episode 1690, step 33800, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 4.0\n", + "episode 1695, step 33900, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 1.0\n", + "episode 1700, step 34000, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 1.0\n", + "episode 1705, step 34100, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 7.0\n", + "episode 1710, step 34200, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 1.0\n", + "episode 1715, step 34300, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 4.0\n", + "episode 1720, step 34400, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 3.0\n", + "episode 1725, step 34500, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 3.0\n", + "episode 1730, step 34600, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 3.0\n", + "episode 1735, step 34700, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 1.0\n", + "episode 1740, step 34800, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 3.0\n", + "episode 1745, step 34900, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 3.0\n", + "episode 1750, step 35000, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 3.0\n", + "episode 1755, step 35100, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 2.0\n", + "episode 1760, step 35200, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 3.0\n", + "episode 1765, step 35300, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 3.0\n", + "episode 1770, step 35400, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 2.0\n", + "episode 1775, step 35500, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 2.0\n", + "episode 1780, step 35600, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 1.0\n", + "episode 1785, step 35700, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 3.0\n", + "episode 1790, step 35800, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 4.0\n", + "episode 1795, step 35900, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 2.0\n", + "episode 1800, step 36000, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 2.0\n", + "episode 1805, step 36100, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 1.0\n", + "episode 1810, step 36200, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 1.0\n", + "episode 1815, step 36300, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 1.0\n", + "episode 1820, step 36400, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 3.0\n", + "episode 1825, step 36500, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 3.0\n", + "episode 1830, step 36600, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 2.0\n", + "episode 1835, step 36700, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 4.0\n", + "episode 1840, step 36800, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 2.0\n", + "episode 1845, step 36900, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 1.0\n", + "episode 1850, step 37000, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 0.0\n", + "episode 1855, step 37100, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 2.0\n", + "episode 1860, step 37200, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 0.0\n", + "episode 1865, step 37300, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 1.0\n", + "episode 1870, step 37400, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 2.0\n", + "episode 1875, step 37500, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 2.0\n", + "episode 1880, step 37600, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 3.0\n", + "episode 1885, step 37700, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 3.0\n", + "episode 1890, step 37800, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 3.0\n", + "episode 1895, step 37900, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 2.0\n", + "episode 1900, step 38000, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 1.0\n", + "episode 1905, step 38100, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 1.0\n", + "episode 1910, step 38200, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 2.0\n", + "episode 1915, step 38300, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 4.0\n", + "episode 1920, step 38400, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 3.0\n", + "episode 1925, step 38500, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 4.0\n", + "episode 1930, step 38600, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 2.0\n", + "episode 1935, step 38700, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 1.0\n", + "episode 1940, step 38800, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 2.0\n", + "episode 1945, step 38900, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 1.0\n", + "episode 1950, step 39000, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 1.0\n", + "episode 1955, step 39100, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 3.0\n", + "episode 1960, step 39200, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 3.0\n", + "episode 1965, step 39300, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 4.0\n", + "episode 1970, step 39400, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 1.0\n", + "episode 1975, step 39500, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 1.0\n", + "episode 1980, step 39600, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 3.0\n", + "episode 1985, step 39700, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 1.0\n", + "episode 1990, step 39800, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 2.0\n", + "episode 1995, step 39900, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 2.0\n", + "episode 2000, step 40000, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 3.0\n", + "episode 2005, step 40100, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 2.0\n", + "episode 2010, step 40200, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 2.0\n", + "episode 2015, step 40300, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 3.0\n", + "episode 2020, step 40400, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 5.0\n", + "episode 2025, step 40500, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 1.0\n", + "episode 2030, step 40600, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 5.0\n", + "episode 2035, step 40700, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 5.0\n", + "episode 2040, step 40800, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 3.0\n", + "episode 2045, step 40900, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 4.0\n", + "episode 2050, step 41000, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 2.0\n", + "episode 2055, step 41100, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 3.0\n", + "episode 2060, step 41200, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 4.0\n", + "episode 2065, step 41300, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 3.0\n", + "episode 2070, step 41400, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 4.0\n", + "episode 2075, step 41500, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 2.0\n", + "episode 2080, step 41600, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 4.0\n", + "episode 2085, step 41700, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 1.0\n", + "episode 2090, step 41800, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 4.0\n", + "episode 2095, step 41900, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 2.0\n", + "episode 2100, step 42000, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 2.0\n", + "episode 2105, step 42100, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 4.0\n", + "episode 2110, step 42200, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 2.0\n", + "episode 2115, step 42300, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 3.0\n", + "episode 2120, step 42400, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 2.0\n", + "episode 2125, step 42500, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 2.0\n", + "episode 2130, step 42600, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 2.0\n", + "episode 2135, step 42700, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 3.0\n", + "episode 2140, step 42800, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 2.0\n", + "episode 2145, step 42900, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 2.0\n", + "episode 2150, step 43000, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 2.0\n", + "episode 2155, step 43100, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 3.0\n", + "episode 2160, step 43200, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 2.0\n", + "episode 2165, step 43300, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 4.0\n", + "episode 2170, step 43400, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 4.0\n", + "episode 2175, step 43500, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 5.0\n", + "episode 2180, step 43600, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 4.0\n", + "episode 2185, step 43700, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 3.0\n", + "episode 2190, step 43800, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 3.0\n", + "episode 2195, step 43900, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 2.0\n", + "episode 2200, step 44000, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 2.0\n", + "episode 2205, step 44100, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 2.0\n", + "episode 2210, step 44200, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 2.0\n", + "episode 2215, step 44300, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 3.0\n", + "episode 2220, step 44400, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 2.0\n", + "episode 2225, step 44500, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 2.0\n", + "episode 2230, step 44600, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 2.0\n", + "episode 2235, step 44700, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 3.0\n", + "episode 2240, step 44800, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 2.0\n", + "episode 2245, step 44900, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 1.0\n", + "episode 2250, step 45000, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 4.0\n", + "episode 2255, step 45100, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 4.0\n", + "episode 2260, step 45200, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 2.0\n", + "episode 2265, step 45300, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 2.0\n", + "episode 2270, step 45400, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 2.0\n", + "episode 2275, step 45500, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 2.0\n", + "episode 2280, step 45600, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 4.0\n", + "episode 2285, step 45700, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 2.0\n", + "episode 2290, step 45800, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 3.0\n", + "episode 2295, step 45900, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 1.0\n", + "episode 2300, step 46000, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 2.0\n", + "episode 2305, step 46100, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 2.0\n", + "episode 2310, step 46200, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 3.0\n", + "episode 2315, step 46300, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 3.0\n", + "episode 2320, step 46400, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 2.0\n", + "episode 2325, step 46500, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 3.0\n", + "episode 2330, step 46600, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 2.0\n", + "episode 2335, step 46700, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 2.0\n", + "episode 2340, step 46800, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 3.0\n", + "episode 2345, step 46900, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 1.0\n", + "episode 2350, step 47000, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 2.0\n", + "episode 2355, step 47100, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 3.0\n", + "episode 2360, step 47200, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 2.0\n", + "episode 2365, step 47300, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 1.0\n", + "episode 2370, step 47400, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 2.0\n", + "episode 2375, step 47500, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 2.0\n", + "episode 2380, step 47600, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 1.0\n", + "episode 2385, step 47700, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 4.0\n", + "episode 2390, step 47800, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 3.0\n", + "episode 2395, step 47900, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 3.0\n", + "episode 2400, step 48000, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 1.0\n", + "episode 2405, step 48100, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 1.0\n", + "episode 2410, step 48200, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 3.0\n", + "episode 2415, step 48300, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 3.0\n", + "episode 2420, step 48400, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 2.0\n", + "episode 2425, step 48500, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 3.0\n", + "episode 2430, step 48600, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 2.0\n", + "episode 2435, step 48700, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 3.0\n", + "episode 2440, step 48800, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 2.0\n", + "episode 2445, step 48900, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 3.0\n", + "episode 2450, step 49000, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 2.0\n", + "episode 2455, step 49100, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 1.0\n", + "episode 2460, step 49200, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 2.0\n", + "episode 2465, step 49300, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 3.0\n", + "episode 2470, step 49400, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 4.0\n", + "episode 2475, step 49500, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 2.0\n", + "episode 2480, step 49600, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 5.0\n", + "episode 2485, step 49700, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 5.0\n", + "episode 2490, step 49800, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 3.0\n", + "episode 2495, step 49900, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 4.0\n", + "episode 2500, step 50000, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 3.0\n", + "episode 2505, step 50100, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 4.0\n", + "episode 2510, step 50200, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 3.0\n", + "episode 2515, step 50300, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 2.0\n", + "episode 2520, step 50400, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 3.0\n", + "episode 2525, step 50500, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 3.0\n", + "episode 2530, step 50600, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 3.0\n", + "episode 2535, step 50700, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 2.0\n", + "episode 2540, step 50800, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 1.0\n", + "episode 2545, step 50900, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 2.0\n", + "episode 2550, step 51000, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 1.0\n", + "episode 2555, step 51100, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 1.0\n", + "episode 2560, step 51200, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 2.0\n", + "episode 2565, step 51300, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 4.0\n", + "episode 2570, step 51400, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 6.0\n", + "episode 2575, step 51500, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 3.0\n", + "episode 2580, step 51600, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 4.0\n", + "episode 2585, step 51700, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 3.0\n", + "episode 2590, step 51800, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 1.0\n", + "episode 2595, step 51900, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 4.0\n", + "episode 2600, step 52000, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 3.0\n", + "episode 2605, step 52100, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 3.0\n", + "episode 2610, step 52200, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 0.0\n", + "episode 2615, step 52300, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 4.0\n", + "episode 2620, step 52400, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 2.0\n", + "episode 2625, step 52500, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 4.0\n", + "episode 2630, step 52600, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 1.0\n", + "episode 2635, step 52700, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 3.0\n", + "episode 2640, step 52800, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 3.0\n", + "episode 2645, step 52900, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 2.0\n", + "episode 2650, step 53000, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 4.0\n", + "episode 2655, step 53100, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 2.0\n", + "episode 2660, step 53200, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 2.0\n", + "episode 2665, step 53300, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 1.0\n", + "episode 2670, step 53400, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 3.0\n", + "episode 2675, step 53500, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 3.0\n", + "episode 2680, step 53600, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 2.0\n", + "episode 2685, step 53700, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 2.0\n", + "episode 2690, step 53800, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 5.0\n", + "episode 2695, step 53900, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 2.0\n", + "episode 2700, step 54000, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 4.0\n", + "episode 2705, step 54100, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 2.0\n", + "episode 2710, step 54200, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 1.0\n", + "episode 2715, step 54300, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 1.0\n", + "episode 2720, step 54400, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 3.0\n", + "episode 2725, step 54500, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 2.0\n", + "episode 2730, step 54600, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 5.0\n", + "episode 2735, step 54700, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 2.0\n", + "episode 2740, step 54800, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 1.0\n", + "episode 2745, step 54900, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 3.0\n", + "episode 2750, step 55000, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 1.0\n", + "episode 2755, step 55100, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 1.0\n", + "episode 2760, step 55200, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 2.0\n", + "episode 2765, step 55300, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 1.0\n", + "episode 2770, step 55400, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 3.0\n", + "episode 2775, step 55500, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 2.0\n", + "episode 2780, step 55600, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 6.0\n", + "episode 2785, step 55700, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 0.0\n", + "episode 2790, step 55800, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 3.0\n", + "episode 2795, step 55900, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 5.0\n", + "episode 2800, step 56000, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 3.0\n", + "episode 2805, step 56100, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 3.0\n", + "episode 2810, step 56200, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 4.0\n", + "episode 2815, step 56300, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 4.0\n", + "episode 2820, step 56400, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 3.0\n", + "episode 2825, step 56500, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 4.0\n", + "episode 2830, step 56600, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 4.0\n", + "episode 2835, step 56700, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 3.0\n", + "episode 2840, step 56800, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 0.0\n", + "episode 2845, step 56900, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 3.0\n", + "episode 2850, step 57000, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 1.0\n", + "episode 2855, step 57100, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 2.0\n", + "episode 2860, step 57200, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 4.0\n", + "episode 2865, step 57300, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 1.0\n", + "episode 2870, step 57400, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 2.0\n", + "episode 2875, step 57500, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 2.0\n", + "episode 2880, step 57600, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 2.0\n", + "episode 2885, step 57700, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 3.0\n", + "episode 2890, step 57800, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 2.0\n", + "episode 2895, step 57900, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 2.0\n", + "episode 2900, step 58000, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 4.0\n", + "episode 2905, step 58100, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 4.0\n", + "episode 2910, step 58200, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 4.0\n", + "episode 2915, step 58300, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 1.0\n", + "episode 2920, step 58400, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 3.0\n", + "episode 2925, step 58500, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 2.0\n", + "episode 2930, step 58600, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 5.0\n", + "episode 2935, step 58700, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 2.0\n", + "episode 2940, step 58800, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 2.0\n", + "episode 2945, step 58900, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 2.0\n", + "episode 2950, step 59000, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 0.0\n", + "episode 2955, step 59100, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 5.0\n", + "episode 2960, step 59200, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 4.0\n", + "episode 2965, step 59300, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 2.0\n", + "episode 2970, step 59400, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 3.0\n", + "episode 2975, step 59500, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 4.0\n", + "episode 2980, step 59600, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 2.0\n", + "episode 2985, step 59700, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 6.0\n", + "episode 2990, step 59800, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 3.0\n", + "episode 2995, step 59900, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 5.0\n", + "episode 3000, step 60000, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 0.0\n", + "episode 3005, step 60100, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 3.0\n", + "episode 3010, step 60200, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 3.0\n", + "episode 3015, step 60300, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 2.0\n", + "episode 3020, step 60400, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 5.0\n", + "episode 3025, step 60500, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 3.0\n", + "episode 3030, step 60600, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 3.0\n", + "episode 3035, step 60700, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 3.0\n", + "episode 3040, step 60800, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 1.0\n", + "episode 3045, step 60900, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 2.0\n", + "episode 3050, step 61000, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 2.0\n", + "episode 3055, step 61100, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 2.0\n", + "episode 3060, step 61200, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 2.0\n", + "episode 3065, step 61300, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 1.0\n", + "episode 3070, step 61400, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 2.0\n", + "episode 3075, step 61500, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 1.0\n", + "episode 3080, step 61600, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 2.0\n", + "episode 3085, step 61700, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 2.0\n", + "episode 3090, step 61800, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 3.0\n", + "episode 3095, step 61900, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 3.0\n", + "episode 3100, step 62000, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 2.0\n", + "episode 3105, step 62100, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 3.0\n", + "episode 3110, step 62200, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 2.0\n", + "episode 3115, step 62300, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 3.0\n", + "episode 3120, step 62400, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 1.0\n", + "episode 3125, step 62500, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 3.0\n", + "episode 3130, step 62600, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 2.0\n", + "episode 3135, step 62700, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 0.0\n", + "episode 3140, step 62800, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 5.0\n", + "episode 3145, step 62900, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 4.0\n", + "episode 3150, step 63000, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 3.0\n", + "episode 3155, step 63100, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 3.0\n", + "episode 3160, step 63200, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 2.0\n", + "episode 3165, step 63300, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 4.0\n", + "episode 3170, step 63400, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 1.0\n", + "episode 3175, step 63500, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 3.0\n", + "episode 3180, step 63600, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 2.0\n", + "episode 3185, step 63700, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 4.0\n", + "episode 3190, step 63800, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 3.0\n", + "episode 3195, step 63900, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 2.0\n", + "episode 3200, step 64000, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 3.0\n", + "episode 3205, step 64100, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 2.0\n", + "episode 3210, step 64200, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 2.0\n", + "episode 3215, step 64300, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 2.0\n", + "episode 3220, step 64400, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 2.0\n", + "episode 3225, step 64500, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 2.0\n", + "episode 3230, step 64600, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 1.0\n", + "episode 3235, step 64700, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 3.0\n", + "episode 3240, step 64800, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 3.0\n", + "episode 3245, step 64900, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 2.0\n", + "episode 3250, step 65000, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 1.0\n", + "episode 3255, step 65100, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 2.0\n", + "episode 3260, step 65200, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 3.0\n", + "episode 3265, step 65300, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 3.0\n", + "episode 3270, step 65400, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 1.0\n", + "episode 3275, step 65500, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 3.0\n", + "episode 3280, step 65600, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 1.0\n", + "episode 3285, step 65700, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 3.0\n", + "episode 3290, step 65800, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 3.0\n", + "episode 3295, step 65900, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 1.0\n", + "episode 3300, step 66000, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 2.0\n", + "episode 3305, step 66100, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 3.0\n", + "episode 3310, step 66200, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 3.0\n", + "episode 3315, step 66300, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 2.0\n", + "episode 3320, step 66400, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 2.0\n", + "episode 3325, step 66500, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 2.0\n", + "episode 3330, step 66600, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 4.0\n", + "episode 3335, step 66700, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 3.0\n", + "episode 3340, step 66800, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 3.0\n", + "episode 3345, step 66900, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 3.0\n", + "episode 3350, step 67000, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 3.0\n", + "episode 3355, step 67100, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 2.0\n", + "episode 3360, step 67200, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 4.0\n", + "episode 3365, step 67300, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 2.0\n", + "episode 3370, step 67400, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 2.0\n", + "episode 3375, step 67500, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 1.0\n", + "episode 3380, step 67600, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 4.0\n", + "episode 3385, step 67700, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 2.0\n", + "episode 3390, step 67800, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 6.0\n", + "episode 3395, step 67900, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 2.0\n", + "episode 3400, step 68000, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 2.0\n", + "episode 3405, step 68100, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 2.0\n", + "episode 3410, step 68200, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 3.0\n", + "episode 3415, step 68300, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 3.0\n", + "episode 3420, step 68400, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 4.0\n", + "episode 3425, step 68500, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 3.0\n", + "episode 3430, step 68600, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 3.0\n", + "episode 3435, step 68700, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 2.0\n", + "episode 3440, step 68800, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 4.0\n", + "episode 3445, step 68900, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 2.0\n", + "episode 3450, step 69000, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 3.0\n", + "episode 3455, step 69100, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 3.0\n", + "episode 3460, step 69200, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 2.0\n", + "episode 3465, step 69300, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 2.0\n", + "episode 3470, step 69400, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 2.0\n", + "episode 3475, step 69500, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 1.0\n", + "episode 3480, step 69600, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 1.0\n", + "episode 3485, step 69700, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 3.0\n", + "episode 3490, step 69800, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 2.0\n", + "episode 3495, step 69900, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 3.0\n", + "episode 3500, step 70000, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 1.0\n", + "episode 3505, step 70100, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 3.0\n", + "episode 3510, step 70200, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 4.0\n", + "episode 3515, step 70300, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 2.0\n", + "episode 3520, step 70400, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 1.0\n", + "episode 3525, step 70500, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 2.0\n", + "episode 3530, step 70600, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 1.0\n", + "episode 3535, step 70700, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 2.0\n", + "episode 3540, step 70800, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 2.0\n", + "episode 3545, step 70900, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 1.0\n", + "episode 3550, step 71000, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 3.0\n", + "episode 3555, step 71100, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 4.0\n", + "episode 3560, step 71200, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 2.0\n", + "episode 3565, step 71300, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 7.0\n", + "episode 3570, step 71400, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 1.0\n", + "episode 3575, step 71500, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 3.0\n", + "episode 3580, step 71600, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 2.0\n", + "episode 3585, step 71700, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 1.0\n", + "episode 3590, step 71800, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 5.0\n", + "episode 3595, step 71900, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 5.0\n", + "episode 3600, step 72000, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 3.0\n", + "episode 3605, step 72100, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 2.0\n", + "episode 3610, step 72200, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 4.0\n", + "episode 3615, step 72300, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 0.0\n", + "episode 3620, step 72400, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 1.0\n", + "episode 3625, step 72500, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 4.0\n", + "episode 3630, step 72600, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 1.0\n", + "episode 3635, step 72700, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 2.0\n", + "episode 3640, step 72800, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 1.0\n", + "episode 3645, step 72900, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 4.0\n", + "episode 3650, step 73000, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 2.0\n", + "episode 3655, step 73100, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 2.0\n", + "episode 3660, step 73200, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 3.0\n", + "episode 3665, step 73300, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 0.0\n", + "episode 3670, step 73400, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 1.0\n", + "episode 3675, step 73500, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 2.0\n", + "episode 3680, step 73600, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 2.0\n", + "episode 3685, step 73700, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 2.0\n", + "episode 3690, step 73800, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 0.0\n", + "episode 3695, step 73900, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 4.0\n", + "episode 3700, step 74000, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 7.0\n", + "episode 3705, step 74100, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 5.0\n", + "episode 3710, step 74200, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 1.0\n", + "episode 3715, step 74300, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 2.0\n", + "episode 3720, step 74400, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 2.0\n", + "episode 3725, step 74500, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 3.0\n", + "episode 3730, step 74600, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 1.0\n", + "episode 3735, step 74700, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 2.0\n", + "episode 3740, step 74800, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 1.0\n", + "episode 3745, step 74900, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 0.0\n", + "episode 3750, step 75000, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 3.0\n", + "episode 3755, step 75100, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 3.0\n", + "episode 3760, step 75200, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 3.0\n", + "episode 3765, step 75300, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 5.0\n", + "episode 3770, step 75400, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 3.0\n", + "episode 3775, step 75500, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 2.0\n", + "episode 3780, step 75600, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 2.0\n", + "episode 3785, step 75700, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 1.0\n", + "episode 3790, step 75800, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 3.0\n", + "episode 3795, step 75900, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 4.0\n", + "episode 3800, step 76000, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 4.0\n", + "episode 3805, step 76100, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 4.0\n", + "episode 3810, step 76200, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 4.0\n", + "episode 3815, step 76300, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 2.0\n", + "episode 3820, step 76400, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 3.0\n", + "episode 3825, step 76500, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 2.0\n", + "episode 3830, step 76600, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 1.0\n", + "episode 3835, step 76700, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 6.0\n", + "episode 3840, step 76800, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 1.0\n", + "episode 3845, step 76900, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 1.0\n", + "episode 3850, step 77000, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 0.0\n", + "episode 3855, step 77100, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 3.0\n", + "episode 3860, step 77200, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 3.0\n", + "episode 3865, step 77300, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 3.0\n", + "episode 3870, step 77400, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 2.0\n", + "episode 3875, step 77500, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 1.0\n", + "episode 3880, step 77600, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 3.0\n", + "episode 3885, step 77700, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 2.0\n", + "episode 3890, step 77800, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 2.0\n", + "episode 3895, step 77900, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 2.0\n", + "episode 3900, step 78000, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 3.0\n", + "episode 3905, step 78100, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 3.0\n", + "episode 3910, step 78200, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 2.0\n", + "episode 3915, step 78300, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 4.0\n", + "episode 3920, step 78400, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 2.0\n", + "episode 3925, step 78500, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 3.0\n", + "episode 3930, step 78600, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 3.0\n", + "episode 3935, step 78700, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 4.0\n", + "episode 3940, step 78800, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 1.0\n", + "episode 3945, step 78900, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 4.0\n", + "episode 3950, step 79000, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 2.0\n", + "episode 3955, step 79100, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 1.0\n", + "episode 3960, step 79200, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 2.0\n", + "episode 3965, step 79300, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 2.0\n", + "episode 3970, step 79400, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 1.0\n", + "episode 3975, step 79500, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 2.0\n", + "episode 3980, step 79600, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 0.0\n", + "episode 3985, step 79700, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 5.0\n", + "episode 3990, step 79800, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 3.0\n", + "episode 3995, step 79900, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 1.0\n", + "episode 4000, step 80000, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 5.0\n", + "episode 4005, step 80100, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 1.0\n", + "episode 4010, step 80200, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 1.0\n", + "episode 4015, step 80300, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 4.0\n", + "episode 4020, step 80400, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 3.0\n", + "episode 4025, step 80500, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 3.0\n", + "episode 4030, step 80600, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 4.0\n", + "episode 4035, step 80700, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 3.0\n", + "episode 4040, step 80800, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 1.0\n", + "episode 4045, step 80900, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 2.0\n", + "episode 4050, step 81000, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 1.0\n", + "episode 4055, step 81100, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 3.0\n", + "episode 4060, step 81200, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 2.0\n", + "episode 4065, step 81300, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 2.0\n", + "episode 4070, step 81400, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 5.0\n", + "episode 4075, step 81500, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 2.0\n", + "episode 4080, step 81600, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 1.0\n", + "episode 4085, step 81700, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 3.0\n", + "episode 4090, step 81800, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 2.0\n", + "episode 4095, step 81900, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 3.0\n", + "episode 4100, step 82000, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 1.0\n", + "episode 4105, step 82100, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 3.0\n", + "episode 4110, step 82200, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 1.0\n", + "episode 4115, step 82300, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 3.0\n", + "episode 4120, step 82400, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 4.0\n", + "episode 4125, step 82500, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 2.0\n", + "episode 4130, step 82600, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 4.0\n", + "episode 4135, step 82700, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 3.0\n", + "episode 4140, step 82800, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 2.0\n", + "episode 4145, step 82900, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 2.0\n", + "episode 4150, step 83000, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 3.0\n", + "episode 4155, step 83100, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 3.0\n", + "episode 4160, step 83200, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 2.0\n", + "episode 4165, step 83300, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 2.0\n", + "episode 4170, step 83400, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 3.0\n", + "episode 4175, step 83500, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 2.0\n", + "episode 4180, step 83600, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 3.0\n", + "episode 4185, step 83700, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 6.0\n", + "episode 4190, step 83800, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 3.0\n", + "episode 4195, step 83900, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 3.0\n", + "episode 4200, step 84000, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 0.0\n", + "episode 4205, step 84100, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 3.0\n", + "episode 4210, step 84200, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 2.0\n", + "episode 4215, step 84300, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 2.0\n", + "episode 4220, step 84400, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 5.0\n", + "episode 4225, step 84500, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 1.0\n", + "episode 4230, step 84600, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 3.0\n", + "episode 4235, step 84700, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 2.0\n", + "episode 4240, step 84800, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 2.0\n", + "episode 4245, step 84900, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 2.0\n", + "episode 4250, step 85000, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 1.0\n", + "episode 4255, step 85100, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 4.0\n", + "episode 4260, step 85200, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 2.0\n", + "episode 4265, step 85300, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 1.0\n", + "episode 4270, step 85400, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 4.0\n", + "episode 4275, step 85500, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 1.0\n", + "episode 4280, step 85600, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 6.0\n", + "episode 4285, step 85700, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 2.0\n", + "episode 4290, step 85800, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 2.0\n", + "episode 4295, step 85900, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 2.0\n", + "episode 4300, step 86000, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 2.0\n", + "episode 4305, step 86100, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 2.0\n", + "episode 4310, step 86200, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 2.0\n", + "episode 4315, step 86300, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 3.0\n", + "episode 4320, step 86400, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 1.0\n", + "episode 4325, step 86500, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 2.0\n", + "episode 4330, step 86600, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 4.0\n", + "episode 4335, step 86700, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 2.0\n", + "episode 4340, step 86800, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 2.0\n", + "episode 4345, step 86900, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 1.0\n", + "episode 4350, step 87000, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 2.0\n", + "episode 4355, step 87100, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 1.0\n", + "episode 4360, step 87200, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 1.0\n", + "episode 4365, step 87300, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 3.0\n", + "episode 4370, step 87400, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 5.0\n", + "episode 4375, step 87500, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 3.0\n", + "episode 4380, step 87600, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 4.0\n", + "episode 4385, step 87700, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 1.0\n", + "episode 4390, step 87800, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 2.0\n", + "episode 4395, step 87900, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 3.0\n", + "episode 4400, step 88000, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 3.0\n", + "episode 4405, step 88100, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 1.0\n", + "episode 4410, step 88200, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 2.0\n", + "episode 4415, step 88300, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 4.0\n", + "episode 4420, step 88400, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 2.0\n", + "episode 4425, step 88500, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 1.0\n", + "episode 4430, step 88600, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 1.0\n", + "episode 4435, step 88700, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 5.0\n", + "episode 4440, step 88800, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 0.0\n", + "episode 4445, step 88900, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 4.0\n", + "episode 4450, step 89000, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 3.0\n", + "episode 4455, step 89100, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 1.0\n", + "episode 4460, step 89200, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 3.0\n", + "episode 4465, step 89300, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 2.0\n", + "episode 4470, step 89400, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 1.0\n", + "episode 4475, step 89500, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 4.0\n", + "episode 4480, step 89600, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 2.0\n", + "episode 4485, step 89700, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 4.0\n", + "episode 4490, step 89800, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 4.0\n", + "episode 4495, step 89900, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 3.0\n", + "episode 4500, step 90000, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 3.0\n", + "episode 4505, step 90100, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 3.0\n", + "episode 4510, step 90200, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 2.0\n", + "episode 4515, step 90300, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 3.0\n", + "episode 4520, step 90400, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 1.0\n", + "episode 4525, step 90500, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 2.0\n", + "episode 4530, step 90600, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 0.0\n", + "episode 4535, step 90700, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 0.0\n", + "episode 4540, step 90800, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 0.0\n", + "episode 4545, step 90900, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 6.0\n", + "episode 4550, step 91000, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 1.0\n", + "episode 4555, step 91100, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 0.0\n", + "episode 4560, step 91200, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 2.0\n", + "episode 4565, step 91300, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 3.0\n", + "episode 4570, step 91400, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 5.0\n", + "episode 4575, step 91500, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 1.0\n", + "episode 4580, step 91600, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 2.0\n", + "episode 4585, step 91700, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 3.0\n", + "episode 4590, step 91800, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 2.0\n", + "episode 4595, step 91900, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 2.0\n", + "episode 4600, step 92000, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 2.0\n", + "episode 4605, step 92100, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 1.0\n", + "episode 4610, step 92200, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 3.0\n", + "episode 4615, step 92300, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 2.0\n", + "episode 4620, step 92400, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 2.0\n", + "episode 4625, step 92500, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 4.0\n", + "episode 4630, step 92600, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 3.0\n", + "episode 4635, step 92700, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 3.0\n", + "episode 4640, step 92800, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 5.0\n", + "episode 4645, step 92900, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 3.0\n", + "episode 4650, step 93000, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 2.0\n", + "episode 4655, step 93100, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 2.0\n", + "episode 4660, step 93200, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 1.0\n", + "episode 4665, step 93300, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 3.0\n", + "episode 4670, step 93400, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 3.0\n", + "episode 4675, step 93500, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 2.0\n", + "episode 4680, step 93600, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 3.0\n", + "episode 4685, step 93700, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 1.0\n", + "episode 4690, step 93800, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 3.0\n", + "episode 4695, step 93900, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 3.0\n", + "episode 4700, step 94000, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 2.0\n", + "episode 4705, step 94100, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 1.0\n", + "episode 4710, step 94200, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 3.0\n", + "episode 4715, step 94300, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 3.0\n", + "episode 4720, step 94400, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 5.0\n", + "episode 4725, step 94500, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 1.0\n", + "episode 4730, step 94600, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 2.0\n", + "episode 4735, step 94700, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 4.0\n", + "episode 4740, step 94800, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 3.0\n", + "episode 4745, step 94900, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 1.0\n", + "episode 4750, step 95000, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 3.0\n", + "episode 4755, step 95100, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 5.0\n", + "episode 4760, step 95200, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 1.0\n", + "episode 4765, step 95300, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 3.0\n", + "episode 4770, step 95400, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 3.0\n", + "episode 4775, step 95500, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 3.0\n", + "episode 4780, step 95600, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 1.0\n", + "episode 4785, step 95700, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 2.0\n", + "episode 4790, step 95800, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 3.0\n", + "episode 4795, step 95900, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 3.0\n", + "episode 4800, step 96000, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 3.0\n", + "episode 4805, step 96100, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 3.0\n", + "episode 4810, step 96200, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 4.0\n", + "episode 4815, step 96300, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 1.0\n", + "episode 4820, step 96400, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 2.0\n", + "episode 4825, step 96500, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 3.0\n", + "episode 4830, step 96600, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 4.0\n", + "episode 4835, step 96700, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 3.0\n", + "episode 4840, step 96800, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 3.0\n", + "episode 4845, step 96900, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 3.0\n", + "episode 4850, step 97000, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 5.0\n", + "episode 4855, step 97100, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 2.0\n", + "episode 4860, step 97200, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 3.0\n", + "episode 4865, step 97300, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 7.0\n", + "episode 4870, step 97400, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 2.0\n", + "episode 4875, step 97500, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 3.0\n", + "episode 4880, step 97600, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 2.0\n", + "episode 4885, step 97700, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 1.0\n", + "episode 4890, step 97800, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 3.0\n", + "episode 4895, step 97900, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 3.0\n", + "episode 4900, step 98000, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 2.0\n", + "episode 4905, step 98100, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 2.0\n", + "episode 4910, step 98200, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 3.0\n", + "episode 4915, step 98300, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 1.0\n", + "episode 4920, step 98400, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 2.0\n", + "episode 4925, step 98500, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 4.0\n", + "episode 4930, step 98600, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 2.0\n", + "episode 4935, step 98700, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 3.0\n", + "episode 4940, step 98800, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 3.0\n", + "episode 4945, step 98900, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 4.0\n", + "episode 4950, step 99000, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 0.0\n", + "episode 4955, step 99100, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 2.0\n", + "episode 4960, step 99200, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 4.0\n", + "episode 4965, step 99300, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 1.0\n", + "episode 4970, step 99400, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 2.0\n", + "episode 4975, step 99500, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 3.0\n", + "episode 4980, step 99600, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 5.0\n", + "episode 4985, step 99700, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 1.0\n", + "episode 4990, step 99800, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 2.0\n", + "episode 4995, step 99900, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 3.0\n", + "episode 5000, step 100000, agent=PearlAgent with BootstrappedDQN, LSTMHistorySummarizationModule(\n", + " (lstm): LSTM(101, 128, num_layers=2, batch_first=True)\n", + "), BootstrapReplayBuffer, env=\n", + "return: 6.0\n" + ] }, { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Summary\n", - "In this example, we illustrated Pearl's capability of dealing with dynamic action space, standard policy learning, history summarization and intelligent exploration, all in a single agent. By running the code above, you should be able to get agent performance results similar to the figure shown in pearl/tutorials/single_item_recommender_system_example/dqn+lstm+deep_explore.png.\n" + "data": { + "image/png": "", + "text/plain": [ + "
" ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [] + }, + "metadata": {}, + "output_type": "display_data" } - ], - "metadata": { + ], + "source": [ + "# Better exploration with BootstrappedDQN-LSTM\n", + "\n", + "agent = PearlAgent(\n", + " policy_learner=BootstrappedDQN(\n", + " q_ensemble_network=EnsembleQValueNetwork(\n", + " state_dim=128,\n", + " action_dim=100,\n", + " ensemble_size=10,\n", + " output_dim=1,\n", + " hidden_dims=[64, 64],\n", + " prior_scale=0.3,\n", + " ),\n", + " action_space=action_space,\n", + " training_rounds=50,\n", + " action_representation_module=action_representation_module,\n", + " ),\n", + " history_summarization_module=LSTMHistorySummarizationModule(\n", + " observation_dim=1,\n", + " action_dim=100,\n", + " hidden_dim=128,\n", + " history_length=8,\n", + " ),\n", + " replay_buffer=BootstrapReplayBuffer(100_000, 1.0, 10),\n", + " device_id=-1,\n", + ")\n", + "\n", + "info = online_learning(\n", + " agent=agent,\n", + " env=env,\n", + " number_of_steps=number_of_steps,\n", + " print_every_x_steps=100,\n", + " record_period=record_period,\n", + " learn_after_episode=True,\n", + ")\n", + "torch.save(info[\"return\"], \"BootstrappedDQN-LSTM-return.pt\")\n", + "plt.plot(record_period * np.arange(len(info[\"return\"])), info[\"return\"], label=\"BootstrappedDQN-LSTM\")\n", + "plt.legend()\n", + "plt.show()" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Summary\n", + "In this example, we illustrated Pearl's capability of dealing with dynamic action space, standard policy learning, history summarization and intelligent exploration, all in a single agent.\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [] + } + ], + "metadata": { + "custom": { + "cells": [], + "metadata": { "custom": { - "cells": [], - "metadata": { - "custom": { - "cells": [], - "metadata": { - "custom": { - "cells": [], - "metadata": { - "custom": { - "cells": [], - "metadata": { - "custom": { - "cells": [], - "metadata": { - "accelerator": "GPU", - "colab": { - "gpuType": "T4", - "include_colab_link": true, - "provenance": [] - }, - "fileHeader": "", - "fileUid": "4316417e-7688-45f2-a94f-24148bfc425e", - "isAdHoc": false, - "kernelspec": { - "display_name": "pearl (local)", - "language": "python", - "name": "pearl_local" - }, - "language_info": { - "name": "python" - } - }, - "nbformat": 4, - "nbformat_minor": 2 - }, - "fileHeader": "", - "fileUid": "1158a851-91bb-437e-a391-aba92448f600", - "indentAmount": 2, - "isAdHoc": false, - "language_info": { - "name": "plaintext" - } - }, - "nbformat": 4, - "nbformat_minor": 2 - }, - "fileHeader": "", - "fileUid": "ddf9fa29-09d7-404d-bc1b-62a580952524", - "indentAmount": 2, - "isAdHoc": false, - "language_info": { - "name": "plaintext" - } - }, - "nbformat": 4, - "nbformat_minor": 2 - }, - "fileHeader": "", - "fileUid": "6fab3b53-8fb0-436a-8149-f5d5acbe58a5", - "indentAmount": 2, - "isAdHoc": false, - "language_info": { - "name": "plaintext" - } - }, - "nbformat": 4, - "nbformat_minor": 2 + "cells": [], + "metadata": { + "custom": { + "cells": [], + "metadata": { + "accelerator": "GPU", + "colab": { + "gpuType": "T4", + "include_colab_link": true, + "provenance": [] }, "fileHeader": "", - "fileUid": "794b393f-dce9-4c73-9ff8-eeff43ca257b", - "indentAmount": 2, + "fileUid": "4316417e-7688-45f2-a94f-24148bfc425e", "isAdHoc": false, + "kernelspec": { + "display_name": "pearl (local)", + "language": "python", + "name": "pearl_local" + }, "language_info": { - "name": "plaintext" + "name": "python" } + }, + "nbformat": 4, + "nbformat_minor": 2 }, - "nbformat": 4, - "nbformat_minor": 2 + "fileHeader": "", + "fileUid": "1158a851-91bb-437e-a391-aba92448f600", + "indentAmount": 2, + "isAdHoc": false, + "language_info": { + "name": "plaintext" + } + }, + "nbformat": 4, + "nbformat_minor": 2 }, - "indentAmount": 2 + "fileHeader": "", + "fileUid": "ddf9fa29-09d7-404d-bc1b-62a580952524", + "indentAmount": 2, + "isAdHoc": false, + "language_info": { + "name": "plaintext" + } + }, + "nbformat": 4, + "nbformat_minor": 2 }, - "nbformat": 4, - "nbformat_minor": 2 + "fileHeader": "", + "fileUid": "6fab3b53-8fb0-436a-8149-f5d5acbe58a5", + "indentAmount": 2, + "isAdHoc": false, + "language_info": { + "name": "plaintext" + } + }, + "nbformat": 4, + "nbformat_minor": 2 }