From 9d8f69d8f4ed3e8544f0b160e1d19ee4716a283e Mon Sep 17 00:00:00 2001 From: Tom Pike Date: Thu, 14 Nov 2024 03:24:15 -0500 Subject: [PATCH 1/3] mesa 3, rl wolf-sheep - wolfsheep updates to 3 --- .gitignore | 4 + .../GoL_fast_screenshot.png | Bin .../conways_game_of_life_fast/Readme.md | 0 .../conways_game_of_life_fast/app.py | 0 .../conways_game_of_life_fast/model.py | 0 rl/README.md | 9 +- rl/Tutorials.ipynb | 85 +++++--- rl/epstein_civil_violence/agent.py | 2 +- rl/epstein_civil_violence/model.py | 2 +- rl/wolf_sheep/agents.py | 15 +- rl/wolf_sheep/app.py | 131 ++++++++++++ rl/wolf_sheep/model.py | 47 +++-- rl/wolf_sheep/server.py | 190 ------------------ rl/wolf_sheep/train_config.py | 3 +- rl/wolf_sheep/utility.py | 6 +- 15 files changed, 235 insertions(+), 259 deletions(-) rename examples/{ => hex_snowflake}/conways_game_of_life_fast/GoL_fast_screenshot.png (100%) rename examples/{ => hex_snowflake}/conways_game_of_life_fast/Readme.md (100%) rename examples/{ => hex_snowflake}/conways_game_of_life_fast/app.py (100%) rename examples/{ => hex_snowflake}/conways_game_of_life_fast/model.py (100%) create mode 100644 rl/wolf_sheep/app.py delete mode 100644 rl/wolf_sheep/server.py diff --git a/.gitignore b/.gitignore index c7984074..995731ed 100644 --- a/.gitignore +++ b/.gitignore @@ -23,6 +23,9 @@ var/ .installed.cfg *.egg +# ignore RL file - users download model on own +rl + # PyInstaller # Usually these files are written by a python script from a template # before PyInstaller builds the exe, so as to inject date/other infos into it. @@ -58,6 +61,7 @@ target/ # Jupyter and iPython notebook checkpoints *.ipynb_checkpoints +*.virtual_documents # Spyder app workspace config file .spyderworkspace diff --git a/examples/conways_game_of_life_fast/GoL_fast_screenshot.png b/examples/hex_snowflake/conways_game_of_life_fast/GoL_fast_screenshot.png similarity index 100% rename from examples/conways_game_of_life_fast/GoL_fast_screenshot.png rename to examples/hex_snowflake/conways_game_of_life_fast/GoL_fast_screenshot.png diff --git a/examples/conways_game_of_life_fast/Readme.md b/examples/hex_snowflake/conways_game_of_life_fast/Readme.md similarity index 100% rename from examples/conways_game_of_life_fast/Readme.md rename to examples/hex_snowflake/conways_game_of_life_fast/Readme.md diff --git a/examples/conways_game_of_life_fast/app.py b/examples/hex_snowflake/conways_game_of_life_fast/app.py similarity index 100% rename from examples/conways_game_of_life_fast/app.py rename to examples/hex_snowflake/conways_game_of_life_fast/app.py diff --git a/examples/conways_game_of_life_fast/model.py b/examples/hex_snowflake/conways_game_of_life_fast/model.py similarity index 100% rename from examples/conways_game_of_life_fast/model.py rename to examples/hex_snowflake/conways_game_of_life_fast/model.py diff --git a/rl/README.md b/rl/README.md index edb45617..88ee5f27 100644 --- a/rl/README.md +++ b/rl/README.md @@ -14,25 +14,28 @@ This repository demonstrates various applications of reinforcement learning (RL) 1. **Install Mesa Models** Begin by installing the Mesa models: +#TODO: Update this -- do release? + ```bash pip install -U -e git+https://github.com/projectmesa/mesa-examples@mesa-2.x#egg=mesa-models ``` -2. **Install RLlib for Multi-Agent Training** +3. **Install RLlib for Multi-Agent Training** Next, install RLlib along with TensorFlow and PyTorch to support multi-agent training algorithms: ```bash pip install "ray[rllib]" tensorflow torch ``` +#TODO Update requirements to mesa[rec] >3.0 -3. **Install Additional Dependencies** +4. **Install Additional Dependencies** Finally, install any remaining dependencies: ```bash pip install -r requirements.txt ``` -4. **Download Pre-Trained Weights** +5. **Download Pre-Trained Weights** Download pre-trained weights from hugging face: ```bash diff --git a/rl/Tutorials.ipynb b/rl/Tutorials.ipynb index e768fc3b..7e01a1d8 100644 --- a/rl/Tutorials.ipynb +++ b/rl/Tutorials.ipynb @@ -6,24 +6,50 @@ "source": [ "# Tutorial: Reinforcement Learning with Mesa Environments\n", "\n", - "# Welcome to this comprehensive guide on integrating reinforcement learning (RL) with Mesa environments. \n", - "# Mesa, an agent-based modeling framework, offers an excellent platform to experiment with RL algorithms. \n", - "# In this tutorial, we'll explore several examples of how RL can be applied to various Mesa environments, \n", - "# starting with the **Epstein Civil Violence model**.\n", - "\n", - "# ## Getting Started\n", + "Welcome to this comprehensive guide on integrating reinforcement learning (RL) with Mesa environments. \n", + "Mesa, an agent-based modeling framework, offers an excellent platform to experiment with RL algorithms. \n", + "In this tutorial, we'll explore several examples of how RL can be applied to various Mesa environments, \n", + "starting with the **Epstein Civil Violence model**." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Getting Started\n", "\n", - "# Before diving into the implementation, take a moment to familiarize yourself with the Epstein Civil Violence model.\n", - "# This will give you a solid understanding of the environment we’ll be working with.\n", + "Before diving into the implementation, take a moment to familiarize yourself with the Epstein Civil Violence model.\n", + "This will give you a solid understanding of the environment we’ll be working with.\n", "\n", - "# Next, ensure all dependencies are installed by following the instructions in the `README.md`.\n" + "Next, ensure all dependencies are installed by following the instructions in the `README.md`." ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 1, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "WARNING:tensorflow:From C:\\Users\\thoma\\miniconda3\\envs\\mesa_dev\\Lib\\site-packages\\ray\\rllib\\utils\\framework.py:130: The name tf.logging.set_verbosity is deprecated. Please use tf.compat.v1.logging.set_verbosity instead.\n", + "\n" + ] + }, + { + "ename": "ModuleNotFoundError", + "evalue": "No module named 'mesa_models.epstein_civil_violence'", + "output_type": "error", + "traceback": [ + "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[1;31mModuleNotFoundError\u001b[0m Traceback (most recent call last)", + "Cell \u001b[1;32mIn[1], line 5\u001b[0m\n\u001b[0;32m 1\u001b[0m \u001b[38;5;66;03m# ### Step 1: Importing the Necessary Modules\u001b[39;00m\n\u001b[0;32m 2\u001b[0m \u001b[38;5;66;03m# To begin, let’s import the required modules for the Epstein Civil Violence model:\u001b[39;00m\n\u001b[0;32m 4\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mepstein_civil_violence\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mmodel\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m EpsteinCivilViolenceRL\n\u001b[1;32m----> 5\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mepstein_civil_violence\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mserver\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m run_model\n\u001b[0;32m 6\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mepstein_civil_violence\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mtrain_config\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m config\n\u001b[0;32m 7\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mtrain\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m train_model\n", + "File \u001b[1;32m~\\Documents\\GitHub\\dev\\mesa-examples\\rl\\epstein_civil_violence\\server.py:4\u001b[0m\n\u001b[0;32m 2\u001b[0m \u001b[38;5;28;01mimport\u001b[39;00m \u001b[38;5;21;01mnumpy\u001b[39;00m \u001b[38;5;28;01mas\u001b[39;00m \u001b[38;5;21;01mnp\u001b[39;00m\n\u001b[0;32m 3\u001b[0m \u001b[38;5;28;01mimport\u001b[39;00m \u001b[38;5;21;01mray\u001b[39;00m\n\u001b[1;32m----> 4\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mmesa_models\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mepstein_civil_violence\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mportrayal\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m citizen_cop_portrayal\n\u001b[0;32m 5\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mray\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m tune\n\u001b[0;32m 6\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mray\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mrllib\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01malgorithms\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01malgorithm\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m Algorithm\n", + "\u001b[1;31mModuleNotFoundError\u001b[0m: No module named 'mesa_models.epstein_civil_violence'" + ] + } + ], "source": [ "# ### Step 1: Importing the Necessary Modules\n", "# To begin, let’s import the required modules for the Epstein Civil Violence model:\n", @@ -122,37 +148,42 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "# ### Alternative Approach: Using Stable-Baselines with Mesa\n", + "# Alternative Approach: Using Stable-Baselines with Mesa\n", "\n", - "# In the example above, we utilized RLlib to integrate reinforcement learning algorithms with the Mesa environment, \n", - "# which is particularly useful when you want different policies for different agents. \n", - "# However, if your use case requires a simpler setup where all agents follow the same policy, \n", - "# you can opt for Stable-Baselines. An example of integrating Stable-Baselines with Mesa can be found in the Boltzmann Money model.\n", + "In the example above, we utilized RLlib to integrate reinforcement learning algorithms with the Mesa environment, which is particularly useful when you want different policies for different agents. \n", + "However, if your use case requires a simpler setup where all agents follow the same policy, you can opt for Stable-Baselines. An example of integrating Stable-Baselines with Mesa can be found in the Boltzmann Money model.\n", "\n", - "# You can explore more on how to use Stable-Baselines with Mesa by following the respective documentation.\n" + "You can explore more on how to use Stable-Baselines with Mesa by following the respective documentation.\n" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ - "# ### Implementing Your Own Cases\n", + "# Implementing Your Own RL Models\n", "\n", - "# If you're ready to explore RL in different agent-based scenarios, you can start by experimenting with various examples we provide at Mesa-Examples:\n", - "# Link: https://github.com/projectmesa/mesa-examples\n", + "If you're ready to explore RL in different agent-based scenarios, you can start by experimenting with various examples we provide at Mesa-Examples:\n", + "Link: https://github.com/projectmesa/mesa-examples\n", "\n", - "# These examples cover a range of scenarios and offer a great starting point for understanding how to apply RL within Mesa environments.\n", + "These examples cover a range of scenarios and offer a great starting point for understanding how to apply RL within Mesa environments.\n", "\n", - "# If you have your own scenario in mind, you can create it as a Mesa model by following this series of Tutorials:\n", - "# Link: https://mesa.readthedocs.io/en/stable/tutorials/intro_tutorial.html\n", + "If you have your own scenario in mind, you can create it as a Mesa model by following this series of Tutorials:\n", + "Link: https://mesa.readthedocs.io/en/stable/tutorials/intro_tutorial.html\n", "\n", - "# Once your scenario is set up as a Mesa model, you can refer to the code in the provided implementations to see how the RL components are built on top of the respective Mesa models.\n" + "Once your scenario is set up as a Mesa model, you can refer to the code in the provided implementations to see how the RL components are built on top of the respective Mesa models.\n" ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] } ], "metadata": { "kernelspec": { - "display_name": "test", + "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, @@ -166,9 +197,9 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.10.0" + "version": "3.12.5" } }, "nbformat": 4, - "nbformat_minor": 2 + "nbformat_minor": 4 } diff --git a/rl/epstein_civil_violence/agent.py b/rl/epstein_civil_violence/agent.py index fb8e2d0d..e652321c 100644 --- a/rl/epstein_civil_violence/agent.py +++ b/rl/epstein_civil_violence/agent.py @@ -1,4 +1,4 @@ -from mesa_models.epstein_civil_violence.agent import Citizen, Cop +from mesa.examples.advanced.epstein_civil_violence.agents import Citizen, Cop from .utility import move diff --git a/rl/epstein_civil_violence/model.py b/rl/epstein_civil_violence/model.py index c061e9c9..f594a094 100644 --- a/rl/epstein_civil_violence/model.py +++ b/rl/epstein_civil_violence/model.py @@ -1,7 +1,7 @@ import gymnasium as gym import mesa import numpy as np -from mesa_models.epstein_civil_violence.model import EpsteinCivilViolence +from mesa.examples.advanced.epstein_civil_violence.model import EpsteinCivilViolence from ray.rllib.env import MultiAgentEnv from .agent import CitizenRL, CopRL diff --git a/rl/wolf_sheep/agents.py b/rl/wolf_sheep/agents.py index c6e5e959..90e8ee81 100644 --- a/rl/wolf_sheep/agents.py +++ b/rl/wolf_sheep/agents.py @@ -1,6 +1,5 @@ -from mesa_models.wolf_sheep.agents import GrassPatch, Sheep, Wolf - -from .utility import move +from mesa.examples.advanced.wolf_sheep.agents import GrassPatch, Sheep, Wolf +from utility import move class SheepRL(Sheep): @@ -28,7 +27,7 @@ def step(self): # Death if self.energy < 0: self.model.grid.remove_agent(self) - self.model.schedule.remove(self) + self.model.remove(self) living = False if living and self.random.random() < self.model.sheep_reproduce: @@ -38,7 +37,7 @@ def step(self): unique_id_str = f"sheep_{self.model.next_id()}" lamb = SheepRL(unique_id_str, self.pos, self.model, self.moore, self.energy) self.model.grid.place_agent(lamb, self.pos) - self.model.schedule.add(lamb) + self.model.add(lamb) class WolfRL(Wolf): @@ -62,12 +61,12 @@ def step(self): # Kill the sheep self.model.grid.remove_agent(sheep_to_eat) - self.model.schedule.remove(sheep_to_eat) + self.model.remove(sheep_to_eat) # Death or reproduction if self.energy < 0: self.model.grid.remove_agent(self) - self.model.schedule.remove(self) + self.model.remove(self) else: if self.random.random() < self.model.wolf_reproduce: # Create a new wolf cub @@ -77,4 +76,4 @@ def step(self): unique_id_str, self.pos, self.model, self.moore, self.energy ) self.model.grid.place_agent(cub, cub.pos) - self.model.schedule.add(cub) + self.model.add(cub) diff --git a/rl/wolf_sheep/app.py b/rl/wolf_sheep/app.py new file mode 100644 index 00000000..916885c2 --- /dev/null +++ b/rl/wolf_sheep/app.py @@ -0,0 +1,131 @@ +from mesa.examples.advanced.wolf_sheep.agents import GrassPatch, Sheep, Wolf +from mesa.examples.advanced.wolf_sheep.model import WolfSheep +from mesa.experimental.devs import ABMSimulator +from mesa.visualization import ( + Slider, + SolaraViz, + make_plot_component, + make_space_component, +) +from model import WolfSheepRL +from ray import tune +from ray.rllib.algorithms.algorithm import Algorithm + +model_params = { + "width": 20, + "height": 20, + "initial_sheep": 100, + "initial_wolves": 25, + "sheep_reproduce": 0.04, + "wolf_reproduce": 0.05, + "wolf_gain_from_food": 20, + "grass": True, + "grass_regrowth_time": 30, + "sheep_gain_from_food": 4, + "seed": 42, + "simulator": ABMSimulator(), + "vision": 4, + "model_path": None, +} + + +class WolfSheepServer(WolfSheepRL): + def __init__(self, **model_params): + super().__init__(**model_params) + + def env_creator(_): + return WolfSheepRL(**model_params) + + tune.register_env("WorldSheepModel-v0", env_creator) + self.iteration = 0 + # Load the model from checkpoint + checkpoint_path = self.model_path + algo = Algorithm.from_checkpoint(checkpoint_path) + self.wolf_policy = algo.get_policy("policy_wolf") + self.sheep_policy = algo.get_policy("policy_sheep") + + +def wolf_sheep_portrayal(agent): + if agent is None: + return + + portrayal = { + "size": 25, + } + + if isinstance(agent, Wolf): + portrayal["color"] = "tab:red" + portrayal["marker"] = "o" + portrayal["zorder"] = 2 + elif isinstance(agent, Sheep): + portrayal["color"] = "tab:cyan" + portrayal["marker"] = "o" + portrayal["zorder"] = 2 + elif isinstance(agent, GrassPatch): + if agent.fully_grown: + portrayal["color"] = "tab:green" + else: + portrayal["color"] = "tab:brown" + portrayal["marker"] = "s" + portrayal["size"] = 75 + + return portrayal + + +model_params = { + "seed": { + "type": "InputText", + "value": 42, + "label": "Random Seed", + }, + "grass": { + "type": "Select", + "value": True, + "values": [True, False], + "label": "grass regrowth enabled?", + }, + "grass_regrowth_time": Slider("Grass Regrowth Time", 20, 1, 50), + "initial_sheep": Slider("Initial Sheep Population", 100, 10, 300), + "sheep_reproduce": Slider("Sheep Reproduction Rate", 0.04, 0.01, 1.0, 0.01), + "initial_wolves": Slider("Initial Wolf Population", 10, 5, 100), + "wolf_reproduce": Slider( + "Wolf Reproduction Rate", + 0.05, + 0.01, + 1.0, + 0.01, + ), + "wolf_gain_from_food": Slider("Wolf Gain From Food Rate", 20, 1, 50), + "sheep_gain_from_food": Slider("Sheep Gain From Food", 4, 1, 10), +} + + +def post_process_space(ax): + ax.set_aspect("equal") + ax.set_xticks([]) + ax.set_yticks([]) + + +def post_process_lines(ax): + ax.legend(loc="center left", bbox_to_anchor=(1, 0.9)) + + +space_component = make_space_component( + wolf_sheep_portrayal, draw_grid=False, post_process=post_process_space +) +lineplot_component = make_plot_component( + {"Wolves": "tab:orange", "Sheep": "tab:cyan", "Grass": "tab:green"}, + post_process=post_process_lines, +) + +simulator = ABMSimulator() +model = WolfSheep(simulator=simulator, grass=True) + +page = SolaraViz( + model, + components=[space_component, lineplot_component], + model_params=model_params, + name="Wolf Sheep", + simulator=simulator, +) +page # noqa diff --git a/rl/wolf_sheep/model.py b/rl/wolf_sheep/model.py index ee580c56..ec09ff59 100644 --- a/rl/wolf_sheep/model.py +++ b/rl/wolf_sheep/model.py @@ -1,13 +1,12 @@ import gymnasium as gym import mesa import numpy as np -from mesa_models.wolf_sheep.agents import GrassPatch -from mesa_models.wolf_sheep.model import WolfSheep -from mesa_models.wolf_sheep.scheduler import RandomActivationByTypeFiltered +from agents import SheepRL, WolfRL +from mesa.examples.advanced.wolf_sheep.agents import GrassPatch +from mesa.examples.advanced.wolf_sheep.model import WolfSheep +from mesa.experimental.devs import ABMSimulator from ray.rllib.env import MultiAgentEnv - -from .agents import SheepRL, WolfRL -from .utility import create_intial_agents, grid_to_observation +from utility import create_intial_agents, grid_to_observation class WolfSheepRL(WolfSheep, MultiAgentEnv): @@ -27,6 +26,8 @@ def __init__( grass=True, grass_regrowth_time=30, sheep_gain_from_food=4, + seed=42, + simulator=ABMSimulator(), vision=4, ): """ @@ -43,7 +44,10 @@ def __init__( grass, grass_regrowth_time, sheep_gain_from_food, + seed, + simulator, ) + # Defining RL specific attributes self.vision = vision # The observation space is a dictionary containing the grid and energy of the agent @@ -62,17 +66,17 @@ def __init__( self.max_steps = 500 self.datacollector = mesa.DataCollector( { - "Wolves": lambda m: m.schedule.get_type_count(WolfRL), - "Sheep": lambda m: m.schedule.get_type_count(SheepRL), - "Grass": lambda m: m.schedule.get_type_count( - GrassPatch, lambda x: x.fully_grown + "Wolves": lambda m: len(m.agents_by_type[WolfRL]), + "Sheep": lambda m: len(m.agents_by_type[SheepRL]), + "Grass": lambda m: len( + m.agents_by_type[GrassPatch].select(lambda a: a.fully_grown) ), } ) def step(self, action_dict): self.action_dict = action_dict - self.schedule.step() + self.agents.shuffle_do("step") self.datacollector.collect(self) # Get rewards @@ -82,7 +86,7 @@ def step(self, action_dict): # We convert grid to a matrix and then neighbors of each agent is extracted grid_to_observation(self, SheepRL, WolfRL, GrassPatch) obs = {} - for agent in self.schedule.agents: + for agent in self.agents: if isinstance(agent, (SheepRL, WolfRL)): neighbors = agent.model.grid.get_neighborhood( agent.pos, moore=True, radius=self.vision @@ -100,16 +104,14 @@ def step(self, action_dict): # Either time finishes or either wolves or sheep are extinct done = { - a.unique_id: False - for a in self.schedule.agents - if isinstance(a, (SheepRL, WolfRL)) + a.unique_id: False for a in self.agents if isinstance(a, (SheepRL, WolfRL)) } # Check if either wolves or sheep are extinct if ( - self.schedule.get_type_count(WolfRL) == 0 - or self.schedule.get_type_count(SheepRL) == 0 - or self.schedule.time > self.max_steps + self.agents["WolfRL"] == 0 + or self.agents["SheepRL"] == 0 + or self.time > self.max_steps ): done["__all__"] = True else: @@ -117,9 +119,7 @@ def step(self, action_dict): # Prepare info dictionary truncated = { - a.unique_id: False - for a in self.schedule.agents - if isinstance(a, (SheepRL, WolfRL)) + a.unique_id: False for a in self.agents if isinstance(a, (SheepRL, WolfRL)) } truncated["__all__"] = np.all(list(truncated.values())) @@ -143,7 +143,7 @@ def cal_reward(self): rewards = {} # Calculate rewards # Agents are rewarded for being alive and having energy - for agent in self.schedule.agents: + for agent in self.agents: if isinstance(agent, (SheepRL, WolfRL)): if isinstance(agent, SheepRL): rewards[agent.unique_id] = min(4, agent.energy - 4) @@ -154,13 +154,12 @@ def cal_reward(self): def reset(self, *, seed=None, options=None): # Reset your environment here super().reset() - self.schedule = RandomActivationByTypeFiltered(self) self.grid = mesa.space.MultiGrid(self.width, self.height, torus=True) self.current_id = 0 create_intial_agents(self, SheepRL, WolfRL, GrassPatch) grid_to_observation(self, SheepRL, WolfRL, GrassPatch) obs = {} - for agent in self.schedule.agents: + for agent in self.agents: if isinstance(agent, (SheepRL, WolfRL)): neighbors = agent.model.grid.get_neighborhood( agent.pos, moore=True, radius=self.vision diff --git a/rl/wolf_sheep/server.py b/rl/wolf_sheep/server.py deleted file mode 100644 index f21c54c0..00000000 --- a/rl/wolf_sheep/server.py +++ /dev/null @@ -1,190 +0,0 @@ -import os - -import mesa -import numpy as np -from mesa_models.wolf_sheep.agents import GrassPatch -from ray import tune -from ray.rllib.algorithms.algorithm import Algorithm - -from .agents import SheepRL, WolfRL -from .model import WolfSheepRL -from .utility import grid_to_observation - - -class WolfSheepServer(WolfSheepRL): - def __init__( - self, - width=20, - height=20, - initial_sheep=100, - initial_wolves=25, - sheep_reproduce=0.04, - wolf_reproduce=0.05, - wolf_gain_from_food=20, - grass=True, - grass_regrowth_time=30, - sheep_gain_from_food=4, - model_path=None, - ): - super().__init__( - width, - height, - initial_sheep, - initial_wolves, - sheep_reproduce, - wolf_reproduce, - wolf_gain_from_food, - grass, - grass_regrowth_time, - sheep_gain_from_food, - ) - - def env_creator(_): - return WolfSheepRL( - width, - height, - initial_sheep, - initial_wolves, - sheep_reproduce, - wolf_reproduce, - wolf_gain_from_food, - grass, - grass_regrowth_time, - sheep_gain_from_food, - ) - - tune.register_env("WorldSheepModel-v0", env_creator) - self.iteration = 0 - # Load the model from checkpoint - checkpoint_path = model_path - algo = Algorithm.from_checkpoint(checkpoint_path) - self.wolf_policy = algo.get_policy("policy_wolf") - self.sheep_policy = algo.get_policy("policy_sheep") - - def step(self): - if self.iteration == 0: - self.reset() - self.datacollector.collect(self) - # Get the observation for each agent - grid_to_observation(self, SheepRL, WolfRL, GrassPatch) - obs = {} - for agent in self.schedule.agents: - if isinstance(agent, (SheepRL, WolfRL)): - neighbors = agent.model.grid.get_neighborhood( - agent.pos, moore=True, radius=self.vision - ) - obs[agent.unique_id] = { - "grid": np.array( - [ - self.obs_grid[neighbor[0]][neighbor[1]] - for neighbor in neighbors - ] - ), - "energy": np.array([agent.energy]), - } - action_dict = {} - # Get the action for each agent - for agent in self.schedule.agents: - if isinstance(agent, SheepRL): - action_dict[agent.unique_id] = self.sheep_policy.compute_single_action( - obs[agent.unique_id], explore=False - )[0] - elif isinstance(agent, WolfRL): - action_dict[agent.unique_id] = self.wolf_policy.compute_single_action( - obs[agent.unique_id], explore=False - )[0] - self.action_dict = action_dict - # Take a step in the environment - self.schedule.step() - self.iteration += 1 - if ( - self.schedule.get_type_count(WolfRL) == 0 - or self.schedule.get_type_count(SheepRL) == 0 - or self.schedule.time > self.max_steps - ): - self.running = False - - -def wolf_sheep_portrayal(agent): - if agent is None: - return - - portrayal = {} - file_path = os.path.dirname(os.path.abspath(__file__)) - resources_path = os.path.join(file_path, "resources") - - if type(agent) is SheepRL: - portrayal["Shape"] = os.path.join(resources_path, "sheep.png") - portrayal["scale"] = 0.9 - portrayal["Layer"] = 1 - - elif type(agent) is WolfRL: - portrayal["Shape"] = os.path.join(resources_path, "wolf.png") - portrayal["scale"] = 0.9 - portrayal["Layer"] = 2 - portrayal["text"] = round(agent.energy, 1) - portrayal["text_color"] = "White" - - elif type(agent) is GrassPatch: - portrayal["Color"] = ( - ["#00FF00", "#00CC00", "#009900"] - if agent.fully_grown - else ["#84e184", "#adebad", "#d6f5d6"] - ) - portrayal["Shape"] = "rect" - portrayal["Filled"] = "true" - portrayal["Layer"] = 0 - portrayal["w"] = 1 - portrayal["h"] = 1 - return portrayal - - -canvas_element = mesa.visualization.CanvasGrid(wolf_sheep_portrayal, 20, 20, 500, 500) -chart_element = mesa.visualization.ChartModule( - [ - {"Label": "Wolves", "Color": "#AA0000"}, - {"Label": "Sheep", "Color": "#666666"}, - {"Label": "Grass", "Color": "#00AA00"}, - ] -) - -model_params = { - "height": 20, - "width": 20, - "model_path": None, - "title": mesa.visualization.StaticText("Parameters:"), - "grass": mesa.visualization.Checkbox("Grass Enabled", True), - "grass_regrowth_time": mesa.visualization.Slider("Grass Regrowth Time", 20, 1, 50), - "initial_sheep": mesa.visualization.Slider( - "Initial Sheep Population", 100, 10, 300 - ), - "sheep_reproduce": mesa.visualization.Slider( - "Sheep Reproduction Rate", 0.04, 0.01, 1.0, 0.01 - ), - "initial_wolves": mesa.visualization.Slider("Initial Wolf Population", 25, 10, 300), - "wolf_reproduce": mesa.visualization.Slider( - "Wolf Reproduction Rate", - 0.05, - 0.01, - 1.0, - 0.01, - description="The rate at which wolf agents reproduce.", - ), - "wolf_gain_from_food": mesa.visualization.Slider( - "Wolf Gain From Food Rate", 20, 1, 50 - ), - "sheep_gain_from_food": mesa.visualization.Slider("Sheep Gain From Food", 4, 1, 10), -} - - -def run_model(height=20, width=20, model_path=None): - model_params["height"] = height - model_params["width"] = width - model_params["model_path"] = model_path - server = mesa.visualization.ModularServer( - WolfSheepServer, - [canvas_element, chart_element], - "Wolf Sheep Predation", - model_params, - ) - return server diff --git a/rl/wolf_sheep/train_config.py b/rl/wolf_sheep/train_config.py index f3c4fdb7..c001f04a 100644 --- a/rl/wolf_sheep/train_config.py +++ b/rl/wolf_sheep/train_config.py @@ -1,10 +1,9 @@ import os +from model import WolfSheepRL from ray.rllib.algorithms.ppo import PPOConfig from ray.rllib.policy.policy import PolicySpec -from .model import WolfSheepRL - # Configuration to train the model # Feel free to adjust the configuration as necessary diff --git a/rl/wolf_sheep/utility.py b/rl/wolf_sheep/utility.py index b65a49ee..40d140d4 100644 --- a/rl/wolf_sheep/utility.py +++ b/rl/wolf_sheep/utility.py @@ -7,7 +7,7 @@ def create_intial_agents(self, SheepRL, WolfRL, GrassPatch): unique_id_str = f"sheep_{self.next_id()}" sheep = SheepRL(unique_id_str, None, self, True, energy) self.grid.place_agent(sheep, (x, y)) - self.schedule.add(sheep) + self.add(sheep) # Create wolves for i in range(self.initial_wolves): @@ -17,7 +17,7 @@ def create_intial_agents(self, SheepRL, WolfRL, GrassPatch): unique_id_str = f"wolf_{self.next_id()}" wolf = WolfRL(unique_id_str, None, self, True, energy) self.grid.place_agent(wolf, (x, y)) - self.schedule.add(wolf) + self.add(wolf) # Create grass patches if self.grass: @@ -32,7 +32,7 @@ def create_intial_agents(self, SheepRL, WolfRL, GrassPatch): unique_id_str = f"grass_{self.next_id()}" patch = GrassPatch(unique_id_str, None, self, fully_grown, countdown) self.grid.place_agent(patch, (x, y)) - self.schedule.add(patch) + self.add(patch) def move(self, action): From 236e4518046880797a7fb9cf296c2681c7c1e5d1 Mon Sep 17 00:00:00 2001 From: Tom Pike Date: Thu, 14 Nov 2024 03:42:45 -0500 Subject: [PATCH 2/3] update epstein 3 for rl - update epstein rl for mesa 3.0 --- rl/epstein_civil_violence/agent.py | 3 +-- rl/epstein_civil_violence/model.py | 25 ++++++++++------------- rl/epstein_civil_violence/train_config.py | 3 +-- rl/epstein_civil_violence/utility.py | 4 ++-- 4 files changed, 15 insertions(+), 20 deletions(-) diff --git a/rl/epstein_civil_violence/agent.py b/rl/epstein_civil_violence/agent.py index e652321c..c693788a 100644 --- a/rl/epstein_civil_violence/agent.py +++ b/rl/epstein_civil_violence/agent.py @@ -1,6 +1,5 @@ from mesa.examples.advanced.epstein_civil_violence.agents import Citizen, Cop - -from .utility import move +from utility import move class CitizenRL(Citizen): diff --git a/rl/epstein_civil_violence/model.py b/rl/epstein_civil_violence/model.py index f594a094..78b21e30 100644 --- a/rl/epstein_civil_violence/model.py +++ b/rl/epstein_civil_violence/model.py @@ -1,11 +1,10 @@ import gymnasium as gym import mesa import numpy as np +from agent import CitizenRL, CopRL from mesa.examples.advanced.epstein_civil_violence.model import EpsteinCivilViolence from ray.rllib.env import MultiAgentEnv - -from .agent import CitizenRL, CopRL -from .utility import create_intial_agents, grid_to_observation +from utility import create_intial_agents, grid_to_observation class EpsteinCivilViolenceRL(EpsteinCivilViolence, MultiAgentEnv): @@ -88,7 +87,7 @@ def step(self, action_dict): self.action_dict = action_dict # Step the model - self.schedule.step() + self.agents.shuffle_do("step") self.datacollector.collect(self) # Calculate rewards @@ -104,10 +103,10 @@ def step(self, action_dict): ] # Get the values from the observation grid for the neighborhood cells # RL specific outputs for the environment - done = {a.unique_id: False for a in self.schedule.agents} - truncated = {a.unique_id: False for a in self.schedule.agents} + done = {a.unique_id: False for a in self.agents} + truncated = {a.unique_id: False for a in self.agents} truncated["__all__"] = np.all(list(truncated.values())) - if self.schedule.time > self.max_iters: + if self.time > self.max_iters: done["__all__"] = True else: done["__all__"] = False @@ -116,7 +115,7 @@ def step(self, action_dict): def cal_reward(self): rewards = {} - for agent in self.schedule.agents: + for agent in self.agents: if isinstance(agent, CopRL): if agent.arrest_made: # Cop is rewarded for making an arrest @@ -149,19 +148,17 @@ def reset(self, *, seed=None, options=None): """ super().reset() - # Using base scheduler to maintain the order of agents - self.schedule = mesa.time.BaseScheduler(self) self.grid = mesa.space.SingleGrid(self.width, self.height, torus=True) create_intial_agents(self, CitizenRL, CopRL) grid_to_observation(self, CitizenRL) # Intialize action dictionary with no action - self.action_dict = {a.unique_id: (0, 0) for a in self.schedule.agents} + self.action_dict = {a.unique_id: (0, 0) for a in self.agents} # Update neighbors for observation space - for agent in self.schedule.agents: + for agent in self.agents: agent.update_neighbors() - self.schedule.step() + self.agents.shuffle_do("step") observation = {} - for agent in self.schedule.agents: + for agent in self.agents: observation[agent.unique_id] = [ self.obs_grid[neighbor[0]][neighbor[1]] for neighbor in agent.neighborhood diff --git a/rl/epstein_civil_violence/train_config.py b/rl/epstein_civil_violence/train_config.py index f226f69d..9b0e90ae 100644 --- a/rl/epstein_civil_violence/train_config.py +++ b/rl/epstein_civil_violence/train_config.py @@ -1,10 +1,9 @@ import os +from model import EpsteinCivilViolenceRL from ray.rllib.algorithms.ppo import PPOConfig from ray.rllib.policy.policy import PolicySpec -from .model import EpsteinCivilViolenceRL - # Configuration for the PPO algorithm # You can change the configuration as per your requirements diff --git a/rl/epstein_civil_violence/utility.py b/rl/epstein_civil_violence/utility.py index 0da17e20..a2f0e876 100644 --- a/rl/epstein_civil_violence/utility.py +++ b/rl/epstein_civil_violence/utility.py @@ -30,9 +30,9 @@ def create_intial_agents(self, CitizenRL, CopRL): # Initializing cops then citizens # This ensures cops act out their step before citizens for cop in cops: - self.schedule.add(cop) + self.add(cop) for citizen in citizens: - self.schedule.add(citizen) + self.add(citizen) def grid_to_observation(self, CitizenRL): From 6556803c9059ce635cd6ba6c6ad0c63a5710cd39 Mon Sep 17 00:00:00 2001 From: Tom Pike Date: Thu, 14 Nov 2024 05:34:58 -0500 Subject: [PATCH 3/3] update boltmann rl for Mesa 3 - updated boltzmann_rl for Mesa 3.0 - creating duplicate agents for some reason; need ot reset the unique_id iterator --- rl/boltzmann_money/model.py | 44 +++++++++++++++++-------------------- 1 file changed, 20 insertions(+), 24 deletions(-) diff --git a/rl/boltzmann_money/model.py b/rl/boltzmann_money/model.py index 75479b0b..fdee254a 100644 --- a/rl/boltzmann_money/model.py +++ b/rl/boltzmann_money/model.py @@ -19,19 +19,16 @@ # Import necessary libraries import numpy as np import seaborn as sns -from mesa_models.boltzmann_wealth_model.model import ( - BoltzmannWealthModel, - MoneyAgent, - compute_gini, -) +from mesa.examples.basic.boltzmann_wealth_model.agents import MoneyAgent +from mesa.examples.basic.boltzmann_wealth_model.model import BoltzmannWealth NUM_AGENTS = 10 # Define the agent class class MoneyAgentRL(MoneyAgent): - def __init__(self, unique_id, model): - super().__init__(unique_id, model) + def __init__(self, model): + super().__init__(model) self.wealth = np.random.randint(1, NUM_AGENTS) def move(self, action): @@ -74,7 +71,8 @@ def take_money(self): def step(self): # Get the action for the agent - action = self.model.action_dict[self.unique_id] + # TODO: figure out why agents are being made twice + action = self.model.action_dict[self.unique_id - 11] # Move the agent based on the action self.move(action) # Take money from other agents in the same cell @@ -82,37 +80,37 @@ def step(self): # Define the model class -class BoltzmannWealthModelRL(BoltzmannWealthModel, gymnasium.Env): - def __init__(self, N, width, height): - super().__init__(N, width, height) +class BoltzmannWealthModelRL(BoltzmannWealth, gymnasium.Env): + def __init__(self, n, width, height): + super().__init__(n, width, height) # Define the observation and action space for the RL model # The observation space is the wealth of each agent and their position - self.observation_space = gymnasium.spaces.Box(low=0, high=10 * N, shape=(N, 3)) + self.observation_space = gymnasium.spaces.Box(low=0, high=10 * n, shape=(n, 3)) # The action space is a MultiDiscrete space with 5 possible actions for each agent - self.action_space = gymnasium.spaces.MultiDiscrete([5] * N) + self.action_space = gymnasium.spaces.MultiDiscrete([5] * n) self.is_visualize = False def step(self, action): self.action_dict = action # Perform one step of the model - self.schedule.step() + self.agents.shuffle_do("step") # Collect data for visualization self.datacollector.collect(self) # Compute the new Gini coefficient - new_gini = compute_gini(self) + new_gini = self.compute_gini() # Compute the reward based on the change in Gini coefficient reward = self.calculate_reward(new_gini) self.prev_gini = new_gini # Get the observation for the RL model obs = self._get_obs() - if self.schedule.time > 5 * NUM_AGENTS: + if self.time > 5 * NUM_AGENTS: # Terminate the episode if the model has run for a certain number of timesteps done = True reward = -1 elif new_gini < 0.1: # Terminate the episode if the Gini coefficient is below a certain threshold done = True - reward = 50 / self.schedule.time + reward = 50 / self.time else: done = False info = {} @@ -142,20 +140,18 @@ def reset(self, *, seed=None, options=None): self.visualize() super().reset() self.grid = mesa.space.MultiGrid(self.grid.width, self.grid.height, True) - self.schedule = mesa.time.RandomActivation(self) + self.remove_all_agents() for i in range(self.num_agents): # Create MoneyAgentRL instances and add them to the schedule - a = MoneyAgentRL(i, self) - self.schedule.add(a) + a = MoneyAgentRL(self) x = self.random.randrange(self.grid.width) y = self.random.randrange(self.grid.height) self.grid.place_agent(a, (x, y)) - self.prev_gini = compute_gini(self) + self.prev_gini = self.compute_gini() return self._get_obs(), {} def _get_obs(self): # The observation is the wealth of each agent and their position - obs = [] - for a in self.schedule.agents: - obs.append([a.wealth, *list(a.pos)]) + obs = [[a.wealth, *a.pos] for a in self.agents] + obs = np.array(obs) return np.array(obs)