projectmesa · tpike3 · Nov 14, 2024 · Nov 14, 2024 · Nov 14, 2024 · Nov 14, 2024
diff --git a/.gitignore b/.gitignore
@@ -23,6 +23,9 @@ var/
 .installed.cfg
 *.egg
 
+# ignore RL file - users download model on own
+rl
+
 # PyInstaller
 #  Usually these files are written by a python script from a template
 #  before PyInstaller builds the exe, so as to inject date/other infos into it.
@@ -58,6 +61,7 @@ target/
 
 # Jupyter and iPython notebook checkpoints
 *.ipynb_checkpoints
+*.virtual_documents
 
 # Spyder app workspace config file
 .spyderworkspace

diff --git a/...game_of_life_fast/GoL_fast_screenshot.png → ...game_of_life_fast/GoL_fast_screenshot.png b/...game_of_life_fast/GoL_fast_screenshot.png → ...game_of_life_fast/GoL_fast_screenshot.png
diff --git a/examples/conways_game_of_life_fast/Readme.md → ...flake/conways_game_of_life_fast/Readme.md b/examples/conways_game_of_life_fast/Readme.md → ...flake/conways_game_of_life_fast/Readme.md
diff --git a/examples/conways_game_of_life_fast/app.py → ...nowflake/conways_game_of_life_fast/app.py b/examples/conways_game_of_life_fast/app.py → ...nowflake/conways_game_of_life_fast/app.py
diff --git a/examples/conways_game_of_life_fast/model.py → ...wflake/conways_game_of_life_fast/model.py b/examples/conways_game_of_life_fast/model.py → ...wflake/conways_game_of_life_fast/model.py
diff --git a/rl/README.md b/rl/README.md
@@ -14,25 +14,28 @@ This repository demonstrates various applications of reinforcement learning (RL)
 1. **Install Mesa Models**
    Begin by installing the Mesa models:
 
+#TODO: Update this -- do release?
+
    ```bash
    pip install -U -e git+https://github.com/projectmesa/[email protected]#egg=mesa-models
    ```
 
-2. **Install RLlib for Multi-Agent Training**
+3. **Install RLlib for Multi-Agent Training**
    Next, install RLlib along with TensorFlow and PyTorch to support multi-agent training algorithms:
 
    ```bash
    pip install "ray[rllib]" tensorflow torch
    ```
+#TODO Update requirements to mesa[rec] >3.0
 
-3. **Install Additional Dependencies**
+4. **Install Additional Dependencies**
    Finally, install any remaining dependencies:
 
    ```bash
    pip install -r requirements.txt
    ```
 
-4. **Download Pre-Trained Weights**
+5. **Download Pre-Trained Weights**
    Download pre-trained weights from hugging face:
 
    ```bash

diff --git a/rl/Tutorials.ipynb b/rl/Tutorials.ipynb
@@ -6,24 +6,50 @@
    "source": [
     "# Tutorial: Reinforcement Learning with Mesa Environments\n",
     "\n",
-    "# Welcome to this comprehensive guide on integrating reinforcement learning (RL) with Mesa environments. \n",
-    "# Mesa, an agent-based modeling framework, offers an excellent platform to experiment with RL algorithms. \n",
-    "# In this tutorial, we'll explore several examples of how RL can be applied to various Mesa environments, \n",
-    "# starting with the **Epstein Civil Violence model**.\n",
-    "\n",
-    "# ## Getting Started\n",
+    "Welcome to this comprehensive guide on integrating reinforcement learning (RL) with Mesa environments. \n",
+    "Mesa, an agent-based modeling framework, offers an excellent platform to experiment with RL algorithms. \n",
+    "In this tutorial, we'll explore several examples of how RL can be applied to various Mesa environments, \n",
+    "starting with the **Epstein Civil Violence model**."
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Getting Started\n",
     "\n",
-    "# Before diving into the implementation, take a moment to familiarize yourself with the Epstein Civil Violence model.\n",
-    "# This will give you a solid understanding of the environment we’ll be working with.\n",
+    "Before diving into the implementation, take a moment to familiarize yourself with the Epstein Civil Violence model.\n",
+    "This will give you a solid understanding of the environment we’ll be working with.\n",
     "\n",
-    "# Next, ensure all dependencies are installed by following the instructions in the `README.md`.\n"
+    "Next, ensure all dependencies are installed by following the instructions in the `README.md`."
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 1,
    "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "WARNING:tensorflow:From C:\\Users\\thoma\\miniconda3\\envs\\mesa_dev\\Lib\\site-packages\\ray\\rllib\\utils\\framework.py:130: The name tf.logging.set_verbosity is deprecated. Please use tf.compat.v1.logging.set_verbosity instead.\n",
+      "\n"
+     ]
+    },
+    {
+     "ename": "ModuleNotFoundError",
+     "evalue": "No module named 'mesa_models.epstein_civil_violence'",
+     "output_type": "error",
+     "traceback": [
+      "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
+      "\u001b[1;31mModuleNotFoundError\u001b[0m                       Traceback (most recent call last)",
+      "Cell \u001b[1;32mIn[1], line 5\u001b[0m\n\u001b[0;32m      1\u001b[0m \u001b[38;5;66;03m# ### Step 1: Importing the Necessary Modules\u001b[39;00m\n\u001b[0;32m      2\u001b[0m \u001b[38;5;66;03m# To begin, let’s import the required modules for the Epstein Civil Violence model:\u001b[39;00m\n\u001b[0;32m      4\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mepstein_civil_violence\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mmodel\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m EpsteinCivilViolenceRL\n\u001b[1;32m----> 5\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mepstein_civil_violence\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mserver\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m run_model\n\u001b[0;32m      6\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mepstein_civil_violence\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mtrain_config\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m config\n\u001b[0;32m      7\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mtrain\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m train_model\n",
+      "File \u001b[1;32m~\\Documents\\GitHub\\dev\\mesa-examples\\rl\\epstein_civil_violence\\server.py:4\u001b[0m\n\u001b[0;32m      2\u001b[0m \u001b[38;5;28;01mimport\u001b[39;00m \u001b[38;5;21;01mnumpy\u001b[39;00m \u001b[38;5;28;01mas\u001b[39;00m \u001b[38;5;21;01mnp\u001b[39;00m\n\u001b[0;32m      3\u001b[0m \u001b[38;5;28;01mimport\u001b[39;00m \u001b[38;5;21;01mray\u001b[39;00m\n\u001b[1;32m----> 4\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mmesa_models\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mepstein_civil_violence\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mportrayal\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m citizen_cop_portrayal\n\u001b[0;32m      5\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mray\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m tune\n\u001b[0;32m      6\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mray\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mrllib\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01malgorithms\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01malgorithm\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m Algorithm\n",
+      "\u001b[1;31mModuleNotFoundError\u001b[0m: No module named 'mesa_models.epstein_civil_violence'"
+     ]
+    }
+   ],
    "source": [
     "# ### Step 1: Importing the Necessary Modules\n",
     "# To begin, let’s import the required modules for the Epstein Civil Violence model:\n",
@@ -122,37 +148,42 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "# ### Alternative Approach: Using Stable-Baselines with Mesa\n",
+    "# Alternative Approach: Using Stable-Baselines with Mesa\n",
     "\n",
-    "# In the example above, we utilized RLlib to integrate reinforcement learning algorithms with the Mesa environment, \n",
-    "# which is particularly useful when you want different policies for different agents. \n",
-    "# However, if your use case requires a simpler setup where all agents follow the same policy, \n",
-    "# you can opt for Stable-Baselines. An example of integrating Stable-Baselines with Mesa can be found in the Boltzmann Money model.\n",
+    "In the example above, we utilized RLlib to integrate reinforcement learning algorithms with the Mesa environment, which is particularly useful when you want different policies for different agents. \n",
+    "However, if your use case requires a simpler setup where all agents follow the same policy, you can opt for Stable-Baselines. An example of integrating Stable-Baselines with Mesa can be found in the Boltzmann Money model.\n",
     "\n",
-    "# You can explore more on how to use Stable-Baselines with Mesa by following the respective documentation.\n"
+    "You can explore more on how to use Stable-Baselines with Mesa by following the respective documentation.\n"
    ]
   },
   {
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "# ### Implementing Your Own Cases\n",
+    "# Implementing Your Own RL Models\n",
     "\n",
-    "# If you're ready to explore RL in different agent-based scenarios, you can start by experimenting with various examples we provide at Mesa-Examples:\n",
-    "# Link: https://github.com/projectmesa/mesa-examples\n",
+    "If you're ready to explore RL in different agent-based scenarios, you can start by experimenting with various examples we provide at Mesa-Examples:\n",
+    "Link: https://github.com/projectmesa/mesa-examples\n",
     "\n",
-    "# These examples cover a range of scenarios and offer a great starting point for understanding how to apply RL within Mesa environments.\n",
+    "These examples cover a range of scenarios and offer a great starting point for understanding how to apply RL within Mesa environments.\n",
     "\n",
-    "# If you have your own scenario in mind, you can create it as a Mesa model by following this series of Tutorials:\n",
-    "# Link: https://mesa.readthedocs.io/en/stable/tutorials/intro_tutorial.html\n",
+    "If you have your own scenario in mind, you can create it as a Mesa model by following this series of Tutorials:\n",
+    "Link: https://mesa.readthedocs.io/en/stable/tutorials/intro_tutorial.html\n",
     "\n",
-    "# Once your scenario is set up as a Mesa model, you can refer to the code in the provided implementations to see how the RL components are built on top of the respective Mesa models.\n"
+    "Once your scenario is set up as a Mesa model, you can refer to the code in the provided implementations to see how the RL components are built on top of the respective Mesa models.\n"
    ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
   }
  ],
  "metadata": {
   "kernelspec": {
-   "display_name": "test",
+   "display_name": "Python 3 (ipykernel)",
    "language": "python",
    "name": "python3"
   },
@@ -166,9 +197,9 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.10.0"
+   "version": "3.12.5"
   }
  },
  "nbformat": 4,
- "nbformat_minor": 2
+ "nbformat_minor": 4
 }
diff --git a/rl/boltzmann_money/model.py b/rl/boltzmann_money/model.py
@@ -19,19 +19,16 @@
 # Import necessary libraries
 import numpy as np
 import seaborn as sns
-from mesa_models.boltzmann_wealth_model.model import (
-    BoltzmannWealthModel,
-    MoneyAgent,
-    compute_gini,
-)
+from mesa.examples.basic.boltzmann_wealth_model.agents import MoneyAgent
+from mesa.examples.basic.boltzmann_wealth_model.model import BoltzmannWealth
 
 NUM_AGENTS = 10
 
 
 # Define the agent class
 class MoneyAgentRL(MoneyAgent):
-    def __init__(self, unique_id, model):
-        super().__init__(unique_id, model)
+    def __init__(self, model):
+        super().__init__(model)
         self.wealth = np.random.randint(1, NUM_AGENTS)
 
     def move(self, action):
@@ -74,45 +71,46 @@ def take_money(self):
 
     def step(self):
         # Get the action for the agent
-        action = self.model.action_dict[self.unique_id]
+        # TODO: figure out why agents are being made twice
+        action = self.model.action_dict[self.unique_id - 11]
         # Move the agent based on the action
         self.move(action)
         # Take money from other agents in the same cell
         self.take_money()
 
 
 # Define the model class
-class BoltzmannWealthModelRL(BoltzmannWealthModel, gymnasium.Env):
-    def __init__(self, N, width, height):
-        super().__init__(N, width, height)
+class BoltzmannWealthModelRL(BoltzmannWealth, gymnasium.Env):
+    def __init__(self, n, width, height):
+        super().__init__(n, width, height)
         # Define the observation and action space for the RL model
         # The observation space is the wealth of each agent and their position
-        self.observation_space = gymnasium.spaces.Box(low=0, high=10 * N, shape=(N, 3))
+        self.observation_space = gymnasium.spaces.Box(low=0, high=10 * n, shape=(n, 3))
         # The action space is a MultiDiscrete space with 5 possible actions for each agent
-        self.action_space = gymnasium.spaces.MultiDiscrete([5] * N)
+        self.action_space = gymnasium.spaces.MultiDiscrete([5] * n)
         self.is_visualize = False
 
     def step(self, action):
         self.action_dict = action
         # Perform one step of the model
-        self.schedule.step()
+        self.agents.shuffle_do("step")
         # Collect data for visualization
         self.datacollector.collect(self)
         # Compute the new Gini coefficient
-        new_gini = compute_gini(self)
+        new_gini = self.compute_gini()
         # Compute the reward based on the change in Gini coefficient
         reward = self.calculate_reward(new_gini)
         self.prev_gini = new_gini
         # Get the observation for the RL model
         obs = self._get_obs()
-        if self.schedule.time > 5 * NUM_AGENTS:
+        if self.time > 5 * NUM_AGENTS:
             # Terminate the episode if the model has run for a certain number of timesteps
             done = True
             reward = -1
         elif new_gini < 0.1:
             # Terminate the episode if the Gini coefficient is below a certain threshold
             done = True
-            reward = 50 / self.schedule.time
+            reward = 50 / self.time
         else:
             done = False
         info = {}
@@ -142,20 +140,18 @@ def reset(self, *, seed=None, options=None):
             self.visualize()
         super().reset()
         self.grid = mesa.space.MultiGrid(self.grid.width, self.grid.height, True)
-        self.schedule = mesa.time.RandomActivation(self)
+        self.remove_all_agents()
         for i in range(self.num_agents):
             # Create MoneyAgentRL instances and add them to the schedule
-            a = MoneyAgentRL(i, self)
-            self.schedule.add(a)
+            a = MoneyAgentRL(self)
             x = self.random.randrange(self.grid.width)
             y = self.random.randrange(self.grid.height)
             self.grid.place_agent(a, (x, y))
-        self.prev_gini = compute_gini(self)
+        self.prev_gini = self.compute_gini()
         return self._get_obs(), {}
 
     def _get_obs(self):
         # The observation is the wealth of each agent and their position
-        obs = []
-        for a in self.schedule.agents:
-            obs.append([a.wealth, *list(a.pos)])
+        obs = [[a.wealth, *a.pos] for a in self.agents]
+        obs = np.array(obs)
         return np.array(obs)
diff --git a/rl/epstein_civil_violence/agent.py b/rl/epstein_civil_violence/agent.py
@@ -1,6 +1,5 @@
-from mesa_models.epstein_civil_violence.agent import Citizen, Cop
-
-from .utility import move
+from mesa.examples.advanced.epstein_civil_violence.agents import Citizen, Cop
+from utility import move
 
 
 class CitizenRL(Citizen):

diff --git a/rl/epstein_civil_violence/model.py b/rl/epstein_civil_violence/model.py
@@ -1,11 +1,10 @@
 import gymnasium as gym
 import mesa
 import numpy as np
-from mesa_models.epstein_civil_violence.model import EpsteinCivilViolence
+from agent import CitizenRL, CopRL
+from mesa.examples.advanced.epstein_civil_violence.model import EpsteinCivilViolence
 from ray.rllib.env import MultiAgentEnv
-
-from .agent import CitizenRL, CopRL
-from .utility import create_intial_agents, grid_to_observation
+from utility import create_intial_agents, grid_to_observation
 
 
 class EpsteinCivilViolenceRL(EpsteinCivilViolence, MultiAgentEnv):
@@ -88,7 +87,7 @@ def step(self, action_dict):
         self.action_dict = action_dict
 
         # Step the model
-        self.schedule.step()
+        self.agents.shuffle_do("step")
         self.datacollector.collect(self)
 
         # Calculate rewards
@@ -104,10 +103,10 @@ def step(self, action_dict):
             ]  # Get the values from the observation grid for the neighborhood cells
 
         # RL specific outputs for the environment
-        done = {a.unique_id: False for a in self.schedule.agents}
-        truncated = {a.unique_id: False for a in self.schedule.agents}
+        done = {a.unique_id: False for a in self.agents}
+        truncated = {a.unique_id: False for a in self.agents}
         truncated["__all__"] = np.all(list(truncated.values()))
-        if self.schedule.time > self.max_iters:
+        if self.time > self.max_iters:
             done["__all__"] = True
         else:
             done["__all__"] = False
@@ -116,7 +115,7 @@ def step(self, action_dict):
 
     def cal_reward(self):
         rewards = {}
-        for agent in self.schedule.agents:
+        for agent in self.agents:
             if isinstance(agent, CopRL):
                 if agent.arrest_made:
                     # Cop is rewarded for making an arrest
@@ -149,19 +148,17 @@ def reset(self, *, seed=None, options=None):
         """
 
         super().reset()
-        # Using base scheduler to maintain the order of agents
-        self.schedule = mesa.time.BaseScheduler(self)
         self.grid = mesa.space.SingleGrid(self.width, self.height, torus=True)
         create_intial_agents(self, CitizenRL, CopRL)
         grid_to_observation(self, CitizenRL)
         # Intialize action dictionary with no action
-        self.action_dict = {a.unique_id: (0, 0) for a in self.schedule.agents}
+        self.action_dict = {a.unique_id: (0, 0) for a in self.agents}
         # Update neighbors for observation space
-        for agent in self.schedule.agents:
+        for agent in self.agents:
             agent.update_neighbors()
-        self.schedule.step()
+        self.agents.shuffle_do("step")
         observation = {}
-        for agent in self.schedule.agents:
+        for agent in self.agents:
             observation[agent.unique_id] = [
                 self.obs_grid[neighbor[0]][neighbor[1]]
                 for neighbor in agent.neighborhood

diff --git a/rl/epstein_civil_violence/train_config.py b/rl/epstein_civil_violence/train_config.py
@@ -1,10 +1,9 @@
 import os
 
+from model import EpsteinCivilViolenceRL
 from ray.rllib.algorithms.ppo import PPOConfig
 from ray.rllib.policy.policy import PolicySpec
 
-from .model import EpsteinCivilViolenceRL
-
 
 # Configuration for the PPO algorithm
 # You can change the configuration as per your requirements

diff --git a/rl/epstein_civil_violence/utility.py b/rl/epstein_civil_violence/utility.py
@@ -30,9 +30,9 @@ def create_intial_agents(self, CitizenRL, CopRL):
     # Initializing cops then citizens
     # This ensures cops act out their step before citizens
     for cop in cops:
-        self.schedule.add(cop)
+        self.add(cop)
     for citizen in citizens:
-        self.schedule.add(citizen)
+        self.add(citizen)
 
 
 def grid_to_observation(self, CitizenRL):