From 80e10c79fd9fb48ea1e8bb3f6260d536720a1ab6 Mon Sep 17 00:00:00 2001 From: Adam Amer <136176500+adamamer20@users.noreply.github.com> Date: Mon, 1 Jul 2024 22:22:03 +0200 Subject: [PATCH 01/15] Modified AgentSet and AgentsDF before adding mesa_frames/time.py. Created a CopyMixin for copy and deepcopy methods to let both AgentSet and Scheduler inherit. Added possibility of using 'do' method with a mask in AgentSetDF, such that the Scheduler can use it to run the method only on a subset of agents. Minor fixes to type hints and docstrings. --- mesa_frames/abstract/agents.py | 253 ++++++++++++------------ mesa_frames/abstract/mixin.py | 135 +++++++++++++ mesa_frames/concrete/agents.py | 173 +++++++++++----- mesa_frames/concrete/agentset_pandas.py | 106 ++++++++-- mesa_frames/concrete/agentset_polars.py | 136 ++++++++++--- mesa_frames/types.py | 2 + tests/test_agentset_pandas.py | 9 +- tests/test_agentset_polars.py | 9 + 8 files changed, 592 insertions(+), 231 deletions(-) create mode 100644 mesa_frames/abstract/mixin.py diff --git a/mesa_frames/abstract/agents.py b/mesa_frames/abstract/agents.py index 2e01b86..4589a03 100644 --- a/mesa_frames/abstract/agents.py +++ b/mesa_frames/abstract/agents.py @@ -1,14 +1,13 @@ from __future__ import annotations # PEP 563: postponed evaluation of type annotations -from abc import ABC, abstractmethod +from abc import abstractmethod from contextlib import suppress -from copy import copy, deepcopy from typing import ( TYPE_CHECKING, Any, Callable, Collection, - Hashable, + Iterable, Iterator, Literal, Self, @@ -18,21 +17,19 @@ from numpy.random import Generator -from mesa_frames.types import BoolSeries, DataFrame, IdsLike, MaskLike, Series +from mesa_frames.abstract.mixin import CopyMixin +from mesa_frames.types import BoolSeries, DataFrame, IdsLike, Index, MaskLike, Series if TYPE_CHECKING: + from mesa_frames.concrete.agents import AgentSetDF from mesa_frames.concrete.model import ModelDF -class AgentContainer(ABC): +class AgentContainer(CopyMixin): """An abstract class for containing agents. Defines the common interface for AgentSetDF and AgentsDF. Attributes ---------- - _copy_with_method : dict[str, tuple[str, list[str]]] - A dictionary of attributes to copy with a specified method and arguments. - _copy_only_reference : list[str] - A list of attributes to copy with a reference only. _model : ModelDF The model that the AgentContainer belongs to. @@ -75,7 +72,6 @@ class AgentContainer(ABC): Get the inactive agents in the AgentContainer. """ - _copy_with_method: dict[str, tuple[str, list[str]]] _copy_only_reference: list[str] = [ "_model", ] @@ -84,73 +80,15 @@ class AgentContainer(ABC): @abstractmethod def __init__(self) -> None: ... - def copy( - self, - deep: bool = False, - memo: dict | None = None, - ) -> Self: - """Create a copy of the AgentContainer. + def discard(self, agents, inplace: bool = True) -> Self: + """Removes agents from the AgentContainer. Does not raise an error if the agent is not found. Parameters ---------- - deep : bool, optional - Flag indicating whether to perform a deep copy of the AgentContainer. - If True, all attributes of the AgentContainer will be recursively copied (except attributes in self._copy_reference_only). - If False, only the top-level attributes will be copied. - Defaults to False. - - memo : dict | None, optional - A dictionary used to track already copied objects during deep copy. - Defaults to None. - - Returns - ------- - Self - A new instance of the AgentContainer class that is a copy of the original instance. - """ - cls = self.__class__ - obj = cls.__new__(cls) - - if deep: - if not memo: - memo = {} - memo[id(self)] = obj - attributes = self.__dict__.copy() - [ - setattr(obj, k, deepcopy(v, memo)) - for k, v in attributes.items() - if k not in self._copy_with_method - and k not in self._copy_only_reference - ] - else: - [ - setattr(obj, k, copy(v)) - for k, v in self.__dict__.items() - if k not in self._copy_with_method - and k not in self._copy_only_reference - ] - - # Copy attributes with a reference only - for attr in self._copy_only_reference: - setattr(obj, attr, getattr(self, attr)) - - # Copy attributes with a specified method - for attr in self._copy_with_method: - attr_obj = getattr(self, attr) - attr_copy_method, attr_copy_args = self._copy_with_method[attr] - setattr(obj, attr, getattr(attr_obj, attr_copy_method)(*attr_copy_args)) - - return obj - - def discard(self, agents: "AgentSetDF" | IdsLike, inplace: bool = True) -> Self: - """Removes an agent from the AgentContainer. Does not raise an error if the agent is not found. - - Parameters - ---------- - ids : MaskLike - The MaskLike of the agents to remove. + agents + The agents to remove inplace : bool - Whether to remove the agent in place. Defaults to False. + Whether to remove the agent in place. Defaults to True. Returns ---------- @@ -161,12 +99,12 @@ def discard(self, agents: "AgentSetDF" | IdsLike, inplace: bool = True) -> Self: return self._get_obj(inplace) @abstractmethod - def add(self, other, inplace: bool = True) -> Self: + def add(self, agents, inplace: bool = True) -> Self: """Add agents to the AgentContainer. Parameters ---------- - other : Any + agents The agents to add. inplace : bool Whether to add the agents in place. Defaults to True. @@ -207,6 +145,7 @@ def do( self, method_name: str, *args, + mask: MaskLike | None = None, return_results: Literal[False] = False, inplace: bool = True, **kwargs, @@ -218,26 +157,32 @@ def do( self, method_name: str, *args, + mask: MaskLike | None = None, return_results: Literal[True], inplace: bool = True, **kwargs, - ) -> Any | dict[str, Any]: ... + ) -> Any | dict["AgentSetDF", Any]: ... @abstractmethod def do( self, method_name: str, *args, + mask: MaskLike | None = None, return_results: bool = False, inplace: bool = True, **kwargs, - ) -> Self | Any | dict[str, Any]: + ) -> Self | Any | dict["AgentSetDF", Any]: """Invoke a method on the AgentContainer. Parameters ---------- method_name : str The name of the method to invoke. + *args : Any + Positional arguments to pass to the method + mask : MaskLike, optional + The subset of agents on which to apply the method return_results : bool, optional Whether to return the result of the method, by default False inplace : bool, optional @@ -281,13 +226,13 @@ def get( ... @abstractmethod - def remove(self, agents: IdsLike, inplace: bool = True) -> Self: - """Removes an agent from the AgentContainer. + def remove(self, agents, inplace: bool = True) -> Self: + """Removes the agents from the AgentContainer Parameters ---------- - agents : MaskLike - The ID of the agent to remove. + agents + The agents to remove. inplace : bool Whether to remove the agent in place. @@ -423,26 +368,8 @@ def sort( A new or updated AgentContainer. """ - def _get_obj(self, inplace: bool) -> Self: - """Get the object to perform operations on. - - Parameters - ---------- - inplace : bool - If inplace, return self. Otherwise, return a copy. - - Returns - ---------- - Self - The object to perform operations on. - """ - if inplace: - return self - else: - return deepcopy(self) - def __add__(self, other) -> Self: - return self.add(other=other, inplace=False) + return self.add(agents=other, inplace=False) def __contains__(self, id: int) -> bool: """Check if an agent is in the AgentContainer. @@ -459,32 +386,7 @@ def __contains__(self, id: int) -> bool: """ if not isinstance(id, int): raise TypeError("id must be an integer") - return self.contains(agents=id) - - def __copy__(self) -> Self: - """Create a shallow copy of the AgentContainer. - - Returns - ------- - Self - A shallow copy of the AgentContainer. - """ - return self.copy(deep=False) - - def __deepcopy__(self, memo: dict) -> Self: - """Create a deep copy of the AgentContainer. - - Parameters - ---------- - memo : dict - A dictionary to store the copied objects. - - Returns - ------- - Self - A deep copy of the AgentContainer. - """ - return self.copy(deep=True, memo=memo) + return self.contains(ids=id) def __getitem__( self, @@ -535,7 +437,7 @@ def __iadd__(self, other) -> Self: Self The updated AgentContainer. """ - return self.add(other=other, inplace=True) + return self.add(agents=other, inplace=True) def __isub__(self, other: "AgentSetDF" | IdsLike) -> Self: """Remove agents from the AgentContainer through the -= operator. @@ -844,7 +746,7 @@ def __init__(self, model: ModelDF) -> None: @abstractmethod def add( - self, other: DataFrame | Sequence[Any] | dict[str, Any], inplace: bool = True + self, agents: DataFrame | Sequence[Any] | dict[str, Any], inplace: bool = True ) -> Self: """Add agents to the AgentSetDF @@ -867,11 +769,15 @@ def add( """ ... + def discard(self, agents: IdsLike, inplace: bool = True) -> Self: + return super().discard(agents, inplace) + @overload def do( self, method_name: str, *args, + mask: MaskLike | None = None, return_results: Literal[False] = False, inplace: bool = True, **kwargs, @@ -882,6 +788,7 @@ def do( self, method_name: str, *args, + mask: MaskLike | None = None, return_results: Literal[True], inplace: bool = True, **kwargs, @@ -891,16 +798,34 @@ def do( self, method_name: str, *args, + mask: MaskLike | None = None, return_results: bool = False, inplace: bool = True, **kwargs, ) -> Self | Any: - obj = self._get_obj(inplace) - method = getattr(obj, method_name) + masked_df = self._get_masked_df(mask) + # If the mask is empty, we can use the object as is + if len(masked_df) == len(self._agents): + obj = self._get_obj(inplace) + method = getattr(obj, method_name) + result = method(*args, **kwargs) + else: # If the mask is not empty, we need to create a new masked AgentSetDF and concatenate the AgentSetDFs at the end + obj = self._get_obj(inplace=False) + obj._agents = masked_df + original_masked_index = obj._get_obj_copy(obj.index) + method = getattr(obj, method_name) + result = method(*args, **kwargs) + obj = obj._concatenate_agentsets( + [self], + duplicates_allowed=True, + keep_first_only=True, + original_masked_index=original_masked_index, + ) + self._agents = obj._agents + self._mask = obj._mask if return_results: - return method(*args, **kwargs) + return result else: - method(*args, **kwargs) return obj @abstractmethod @@ -926,6 +851,67 @@ def get( mask: MaskLike | None = None, ) -> Series | DataFrame: ... + @abstractmethod + def remove(self, agents: IdsLike, inplace: bool = True) -> Self: ... + + @abstractmethod + def _concatenate_agentsets( + self, + objs: Iterable[Self], + duplicates_allowed: bool = True, + keep_first_only: bool = True, + original_masked_index: Index | None = None, + ) -> Self: ... + + @abstractmethod + def _get_bool_mask(self, mask: MaskLike) -> BoolSeries: + """Get the equivalent boolean mask based on the input mask + + Parameters + ---------- + mask : MaskLike + + Returns + ------- + BoolSeries + """ + ... + + @abstractmethod + def _get_masked_df(self, mask: MaskLike) -> DataFrame: + """Get the df filtered by the input mask + + Parameters + ---------- + mask : MaskLike + + Returns + ------- + DataFrame + """ + + @overload + @abstractmethod + def _get_obj_copy(self, obj: DataFrame) -> DataFrame: ... + + @overload + @abstractmethod + def _get_obj_copy(self, obj: Series) -> Series: ... + + @overload + @abstractmethod + def _get_obj_copy(self, obj: Index) -> Index: ... + + @abstractmethod + def _get_obj_copy( + self, obj: DataFrame | Series | Index + ) -> DataFrame | Series | Index: ... + + @abstractmethod + def _update_mask( + self, original_active_indices: Index, new_active_indices: Index | None = None + ) -> None: ... + def __add__(self, other: DataFrame | Sequence[Any] | dict[str, Any]) -> Self: """Add agents to a new AgentSetDF through the + operator. @@ -1033,3 +1019,6 @@ def active_agents(self) -> DataFrame: ... @property @abstractmethod def inactive_agents(self) -> DataFrame: ... + + @property + def index(self) -> Index: ... diff --git a/mesa_frames/abstract/mixin.py b/mesa_frames/abstract/mixin.py new file mode 100644 index 0000000..b027546 --- /dev/null +++ b/mesa_frames/abstract/mixin.py @@ -0,0 +1,135 @@ +from abc import ABC, abstractmethod +from copy import copy, deepcopy +from typing import Self + + +class CopyMixin(ABC): + """A mixin class that provides a fast copy method for the class that inherits it. + + Attributes + ---------- + _copy_with_method : dict[str, tuple[str, list[str]]] + A dictionary that maps the attribute name to a tuple containing the method name and the arguments to be passed to the method. This is used to copy attributes that use a specific method to be called for copying (eg pd.DataFrame.copy() method). + _copy_only_reference : list[str] + A list of attribute names that should only be copied by reference. + + Methods + ------- + copy(deep: bool = False, memo: dict | None = None) -> Self + Create a copy of the object. If deep is True, a deep copy is created. If deep is False, a shallow copy is created. + + + Returns + ------- + _type_ + _description_ + """ + + _copy_with_method: dict[str, tuple[str, list[str]]] + _copy_only_reference: list[str] = [ + "_model", + ] + + @abstractmethod + def __init__(self): ... + + def copy( + self, + deep: bool = False, + memo: dict | None = None, + ) -> Self: + """Create a copy of the Class. + + Parameters + ---------- + deep : bool, optional + Flag indicating whether to perform a deep copy of the AgentContainer. + If True, all attributes of the AgentContainer will be recursively copied (except attributes in self._copy_reference_only). + If False, only the top-level attributes will be copied. + Defaults to False. + + memo : dict | None, optional + A dictionary used to track already copied objects during deep copy. + Defaults to None. + + Returns + ------- + Self + A new instance of the AgentContainer class that is a copy of the original instance. + """ + cls = self.__class__ + obj = cls.__new__(cls) + + if deep: + if not memo: + memo = {} + memo[id(self)] = obj + attributes = self.__dict__.copy() + [ + setattr(obj, k, deepcopy(v, memo)) + for k, v in attributes.items() + if k not in self._copy_with_method + and k not in self._copy_only_reference + ] + else: + [ + setattr(obj, k, copy(v)) + for k, v in self.__dict__.items() + if k not in self._copy_with_method + and k not in self._copy_only_reference + ] + + # Copy attributes with a reference only + for attr in self._copy_only_reference: + setattr(obj, attr, getattr(self, attr)) + + # Copy attributes with a specified method + for attr in self._copy_with_method: + attr_obj = getattr(self, attr) + attr_copy_method, attr_copy_args = self._copy_with_method[attr] + setattr(obj, attr, getattr(attr_obj, attr_copy_method)(*attr_copy_args)) + + return obj + + def _get_obj(self, inplace: bool) -> Self: + """Get the object to perform operations on. + + Parameters + ---------- + inplace : bool + If inplace, return self. Otherwise, return a copy. + + Returns + ---------- + Self + The object to perform operations on. + """ + if inplace: + return self + else: + return deepcopy(self) + + def __copy__(self) -> Self: + """Create a shallow copy of the AgentContainer. + + Returns + ------- + Self + A shallow copy of the AgentContainer. + """ + return self.copy(deep=False) + + def __deepcopy__(self, memo: dict) -> Self: + """Create a deep copy of the AgentContainer. + + Parameters + ---------- + memo : dict + A dictionary to store the copied objects. + + Returns + ------- + Self + A deep copy of the AgentContainer. + """ + return self.copy(deep=True, memo=memo) diff --git a/mesa_frames/concrete/agents.py b/mesa_frames/concrete/agents.py index 0729980..ed77f00 100644 --- a/mesa_frames/concrete/agents.py +++ b/mesa_frames/concrete/agents.py @@ -1,13 +1,20 @@ -from operator import ne -from typing import Any, Callable, Iterable, Iterator, Literal, Self, Sequence, overload +from collections import defaultdict +from typing import ( + Any, + Callable, + Iterable, + Iterator, + Literal, + Self, + Sequence, + cast, + overload, +) import polars as pl -from mesa import Agent -from mesa_frames.abstract.agents import AgentContainer, AgentSetDF, Collection, Hashable -from mesa_frames.concrete.agentset_pandas import AgentSetPandas -from mesa_frames.concrete.agentset_polars import AgentSetPolars -from mesa_frames.types import BoolSeries, DataFrame, IdsLike, MaskLike, Series +from mesa_frames.abstract.agents import AgentContainer, AgentSetDF, Collection +from mesa_frames.types import DataFrame, IdsLike, MaskLike, Series class AgentsDF(AgentContainer): @@ -92,13 +99,13 @@ def __init__(self) -> None: self._ids = pl.Series(name="unique_id", dtype=pl.Int64) def add( - self, other: AgentSetDF | Iterable[AgentSetDF], inplace: bool = True + self, agentsets: AgentSetDF | Iterable[AgentSetDF], inplace: bool = True ) -> Self: """Add an AgentSetDF to the AgentsDF. Parameters ---------- - other : AgentSetDF + agentsets : AgentSetDF | Iterable[AgentSetDF] The AgentSetDF to add. inplace : bool Whether to add the AgentSetDF in place. @@ -107,29 +114,43 @@ def add( ---------- Self The updated AgentsDF. + + Raises + ------ + ValueError + If some agentsets are already present in the AgentsDF or if the IDs are not unique. """ obj = self._get_obj(inplace) - self._check_ids(other) - if isinstance(other, AgentSetDF): - obj._agentsets.append(other) - elif isinstance(other, Iterable): - if not all(isinstance(agentset, AgentSetDF) for agentset in other): - raise TypeError("All elements in the iterable must be AgentSetDFs.") - obj._agentsets.extend(other) - return self + other_list = obj._return_agentsets_list(agentsets) + if obj._check_agentsets_presence(other_list).any(): + raise ValueError("Some agentsets are already present in the AgentsDF.") + new_ids = pl.concat( + [obj._ids] + [pl.Series(agentset["unique_id"]) for agentset in other_list] + ) + if new_ids.is_duplicated().any(): + raise ValueError("Some of the agent IDs are not unique.") + obj._agentsets.extend(other_list) + obj._ids = new_ids + return obj @overload - def contains(self, agents: AgentSetDF | int) -> bool: ... + def contains(self, agents: int | AgentSetDF) -> bool: ... @overload - def contains(self, agents: IdsLike) -> pl.Series: ... + def contains(self, agents: IdsLike | Iterable[AgentSetDF]) -> pl.Series: ... - def contains(self, agents: AgentSetDF | IdsLike) -> bool | pl.Series: - if isinstance(agents, int): - return agents in self._ids - elif isinstance(agents, AgentSetDF): - return agents in self._agentsets + def contains( + self, agents: AgentSetDF | IdsLike | Iterable[AgentSetDF] + ) -> bool | pl.Series: + if isinstance(agents, AgentSetDF): + return self._check_agentsets_presence([agents]).any() + elif isinstance(agents, Iterable) and isinstance( + next(iter(agents)), AgentSetDF + ): + agents = cast(Iterable[AgentSetDF], agents) + return self._check_agentsets_presence(list(agents)) else: + agents = cast(IdsLike, agents) return pl.Series(agents).is_in(self._ids) @overload @@ -194,21 +215,27 @@ def get( agentset: agentset.get(attr_names, mask) for agentset in self._agentsets } - def remove(self, agents: AgentSetDF | IdsLike, inplace: bool = True) -> Self: + def remove( + self, agents: AgentSetDF | Iterable[AgentSetDF] | IdsLike, inplace: bool = True + ) -> Self: obj = self._get_obj(inplace) deleted = 0 if isinstance(agents, AgentSetDF): - try: - obj._agentsets.remove(agents) - except ValueError: - raise KeyError(f"{agents} not found in the AgentsDF.") - else: # elif isinstance(ids, IdsLike): - for agents in obj._agentsets: - initial_len = len(agents) - agents.discard(agents, inplace=True) - deleted += initial_len - len(agents) - if deleted < len(list(agents)): # TODO: fix type hint - raise KeyError(f"Some ids were not found in the AgentsDF.") + self._agentsets.remove(agents) + elif isinstance(agents, Iterable) and isinstance( + next(iter(agents)), AgentSetDF + ): # Faster than controlling every AgentSetDF + for agentset in iter(agents): + self._agentsets.remove(agentset) # type: ignore (Pylance can't recognize agents as Iterable[AgentSetDF]) + else: # IDsLike + for agentset in obj._agentsets: + initial_len = len(agentset) + agentset.discard(agents, inplace=True) + deleted += initial_len - len(agentset) + if deleted < len(list(agents)): # TODO: fix type hint + raise KeyError( + f"There exist some IDs which are not present in any agentset" + ) return obj def set( @@ -263,12 +290,12 @@ def sort( ] return obj - def _check_ids(self, other: AgentSetDF | Iterable[AgentSetDF]) -> None: + def _check_ids_presence(self, other: list[AgentSetDF]) -> pl.DataFrame: """Check if the IDs of the agents to be added are unique. Parameters ---------- - other : AgentSetDF | Iterable[AgentSetDF] + other : list[AgentSetDF] The AgentSetDFs to check. Raises @@ -276,19 +303,56 @@ def _check_ids(self, other: AgentSetDF | Iterable[AgentSetDF]) -> None: ValueError If the agent set contains IDs already present in agents. """ - for agentset in [other] if isinstance(other, AgentSetDF) else other: - if isinstance(agentset, AgentSetPandas): - new_ids = pl.from_pandas(agentset._agents.index) - elif isinstance(agentset, AgentSetPolars): - new_ids = agentset._agents["unique_id"] - else: - raise TypeError( - "AgentSetDF must be of type AgentSetPandas or AgentSetPolars." - ) - if new_ids.is_in(self._ids).any(): - raise ValueError( - "The agent set contains IDs already present in agents." - ) + presence_df = pl.DataFrame( + data={"unique_id": self._ids}, + schema={"unique_id": pl.Int64, "present": pl.Boolean}, + ) + for agentset in other: + new_ids = pl.Series(agentset["unique_id"]) + presence_df = pl.concat( + [ + presence_df, + ( + new_ids.is_in(presence_df["unique_id"]) + .to_frame() + .with_columns("unique_id", new_ids) + ), + ] + ) + return presence_df + + def _check_agentsets_presence(self, other: list[AgentSetDF]) -> pl.Series: + """Check if the agent sets to be added are already present in the AgentsDF. + + Parameters + ---------- + other : list[AgentSetDF] + The AgentSetDFs to check. + + Raises + ------ + ValueError + If the agent sets are already present in the AgentsDF. + """ + other_set = set(other) + return pl.Series( + [agentset in other_set for agentset in self._agentsets], dtype=pl.Boolean + ) + + def _return_agentsets_list( + self, agentsets: AgentSetDF | Iterable[AgentSetDF] + ) -> list[AgentSetDF]: + """Convert the agentsets to a list of AgentSetDF + + Parameters + ---------- + agentsets : AgentSetDF | Iterable[AgentSetDF] + + Returns + ------- + list[AgentSetDF] + """ + return [agentsets] if isinstance(agentsets, AgentSetDF) else list(agentsets) def __add__(self, other: AgentSetDF | Iterable[AgentSetDF]) -> Self: """Add AgentSetDFs to a new AgentsDF through the + operator. @@ -363,6 +427,13 @@ def agents(self, other: Iterable[AgentSetDF]) -> None: def active_agents(self) -> dict[AgentSetDF, DataFrame]: return {agentset: agentset.active_agents for agentset in self._agentsets} + @property + def agentsets_by_type(self) -> dict[type[AgentSetDF], list[AgentSetDF]]: + dictionary = defaultdict(list) + for agentset in self._agentsets: + dictionary[agentset.__class__] = dictionary[agentset.__class__] + [agentset] + return dictionary + @property def inactive_agents(self): return {agentset: agentset.inactive_agents for agentset in self._agentsets} diff --git a/mesa_frames/concrete/agentset_pandas.py b/mesa_frames/concrete/agentset_pandas.py index 6f89b01..fd5f24b 100644 --- a/mesa_frames/concrete/agentset_pandas.py +++ b/mesa_frames/concrete/agentset_pandas.py @@ -3,6 +3,7 @@ Any, Callable, Collection, + Iterable, Iterator, Self, Sequence, @@ -11,9 +12,11 @@ import pandas as pd import polars as pl +from networkx import intersection from mesa_frames.abstract.agents import AgentSetDF from mesa_frames.concrete.agentset_polars import AgentSetPolars +from mesa_frames.concrete.model import ModelDF from mesa_frames.types import PandasIdsLike, PandasMaskLike if TYPE_CHECKING: @@ -115,38 +118,46 @@ def __init__(self, model: "ModelDF") -> None: def add( self, - other: pd.DataFrame | Sequence[Any] | dict[str, Any], + agents: pd.DataFrame | Sequence[Any] | dict[str, Any], inplace: bool = True, ) -> Self: obj = self._get_obj(inplace) - if isinstance(other, pd.DataFrame): - new_agents = other - if "unique_id" != other.index.name: + if isinstance(agents, pd.DataFrame): + new_agents = agents + if "unique_id" != agents.index.name: try: new_agents.set_index("unique_id", inplace=True, drop=True) except KeyError: raise KeyError("DataFrame must have a unique_id column/index.") - elif isinstance(other, dict): - if "unique_id" not in other: + elif isinstance(agents, dict): + if "unique_id" not in agents: raise KeyError("Dictionary must have a unique_id key.") - index = other.pop("unique_id") + index = agents.pop("unique_id") if not isinstance(index, list): index = [index] - new_agents = pd.DataFrame(other, index=pd.Index(index, name="unique_id")) + new_agents = pd.DataFrame(agents, index=pd.Index(index, name="unique_id")) else: - if len(other) != len(obj._agents.columns) + 1: + if len(agents) != len(obj._agents.columns) + 1: raise ValueError( "Length of data must match the number of columns in the AgentSet if being added as a Collection." ) columns = pd.Index(["unique_id"]).append(obj._agents.columns.copy()) - new_agents = pd.DataFrame([other], columns=columns).set_index( + new_agents = pd.DataFrame([agents], columns=columns).set_index( "unique_id", drop=True ) if new_agents.index.dtype != "int64": raise TypeError("unique_id must be of type int64.") + if not obj._agents.index.intersection(new_agents.index).empty: + raise KeyError("Some IDs already exist in the agent set.") + + original_active_indices = obj._mask.index[obj._mask].copy() + obj._agents = pd.concat([obj._agents, new_agents]) + + obj._update_mask(original_active_indices, new_agents.index) + return obj @overload @@ -155,10 +166,7 @@ def contains(self, ids: int) -> bool: ... @overload def contains(self, ids: PandasIdsLike) -> pd.Series: ... - def contains( - self, - ids: PandasIdsLike, - ) -> bool | pd.Series: + def contains(self, ids: PandasIdsLike) -> bool | pd.Series: if isinstance(ids, pd.Series): return ids.isin(self._agents.index) elif isinstance(ids, pd.Index): @@ -174,11 +182,13 @@ def get( self, attr_names: str | Collection[str] | None = None, mask: PandasMaskLike = None, - ) -> pd.Series | pd.DataFrame: + ) -> pd.Index | pd.Series | pd.DataFrame: mask = self._get_bool_mask(mask) if attr_names is None: return self._agents.loc[mask] else: + if attr_names == "unique_id": + return self._agents.loc[mask].index if isinstance(attr_names, str): return self._agents.loc[mask, attr_names] if isinstance(attr_names, Collection): @@ -193,9 +203,12 @@ def remove( initial_len = len(obj._agents) mask = obj._get_bool_mask(ids) remove_ids = obj._agents[mask].index + original_active_indices = obj._mask.index[obj._mask].copy() obj._agents.drop(remove_ids, inplace=True) if len(obj._agents) == initial_len: - raise KeyError(f"IDs {ids} not found in agent set.") + raise KeyError(f"Some IDs were not found in agent set.") + + self._update_mask(original_active_indices) return obj def set( @@ -281,6 +294,36 @@ def to_polars(self) -> AgentSetPolars: new_obj._mask = pl.Series(self._mask) return new_obj + def _concatenate_agentsets( + self, + agentsets: Iterable[Self], + duplicates_allowed: bool = True, + keep_first_only: bool = True, + original_masked_index: pd.Index | None = None, + ) -> Self: + if not duplicates_allowed: + indices = [self._agents.index.to_series()] + [ + agentset._agents.index.to_series() for agentset in agentsets + ] + pd.concat(indices, verify_integrity=True) + if duplicates_allowed & keep_first_only: + final_df = self._agents.copy() + final_mask = self._mask.copy() + for obj in iter(agentsets): + final_df = final_df.combine_first(obj._agents) + final_mask = final_mask.combine_first(obj._mask) + else: + final_df = pd.concat([obj._agents for obj in agentsets]) + final_mask = pd.concat([obj._mask for obj in agentsets]) + new_obj = self._get_obj(inplace=False) + new_obj._agents = final_df + new_obj._mask = final_mask + if not isinstance(original_masked_index, type(None)): + ids_to_remove = original_masked_index.difference(self._agents.index) + if not ids_to_remove.empty: + new_obj.remove(ids_to_remove, inplace=True) + return new_obj + def _get_bool_mask( self, mask: PandasMaskLike = None, @@ -339,6 +382,33 @@ def _get_masked_df( mask_df = mask_series.to_frame("unique_id").set_index("unique_id") return mask_df.join(self._agents, on="unique_id", how="left") + @overload + def _get_obj_copy(self, obj: pd.Series) -> pd.Series: ... + + @overload + def _get_obj_copy(self, obj: pd.DataFrame) -> pd.DataFrame: ... + + @overload + def _get_obj_copy(self, obj: pd.Index) -> pd.Index: ... + + def _get_obj_copy( + self, obj: pd.Series | pd.DataFrame | pd.Index + ) -> pd.Series | pd.DataFrame | pd.Index: + return obj.copy() + + def _update_mask( + self, + original_active_indices: pd.Index, + new_active_indices: pd.Index | None = None, + ) -> None: + # Update the mask with the old active agents and the new agents + self._mask = pd.Series( + self._agents.index.isin(original_active_indices) + | self._agents.index.isin(new_active_indices), + index=self._agents.index, + dtype=pd.BooleanDtype(), + ) + def __getattr__(self, name: str) -> Any: super().__getattr__(name) return getattr(self._agents, name) @@ -377,3 +447,7 @@ def active_agents(self, mask: PandasMaskLike) -> None: @property def inactive_agents(self) -> pd.DataFrame: return self._agents.loc[~self._mask] + + @property + def index(self) -> pd.Index: + return self._agents.index diff --git a/mesa_frames/concrete/agentset_polars.py b/mesa_frames/concrete/agentset_polars.py index 13cd4e7..197c69a 100644 --- a/mesa_frames/concrete/agentset_polars.py +++ b/mesa_frames/concrete/agentset_polars.py @@ -3,10 +3,8 @@ Any, Callable, Collection, - Hashable, Iterable, Iterator, - Literal, Self, Sequence, overload, @@ -124,11 +122,11 @@ def __init__(self, model: "ModelDF") -> None: """ self._model = model self._agents = pl.DataFrame(schema={"unique_id": pl.Int64}) - self._mask = pl.repeat(True, len(self._agents)) + self._mask = pl.repeat(True, len(self._agents), dtype=pl.Boolean, eager=True) def add( self, - other: pl.DataFrame | Sequence[Any] | dict[str, Any], + agents: pl.DataFrame | Sequence[Any] | dict[str, Any], inplace: bool = True, ) -> Self: """Add agents to the AgentSetPolars. @@ -146,25 +144,35 @@ def add( The updated AgentSetPolars. """ obj = self._get_obj(inplace) - if isinstance(other, pl.DataFrame): - if "unique_id" not in other.columns: + if isinstance(agents, pl.DataFrame): + if "unique_id" not in agents.columns: raise KeyError("DataFrame must have a unique_id column.") - new_agents = other - elif isinstance(other, dict): - if "unique_id" not in other: + new_agents = agents + elif isinstance(agents, dict): + if "unique_id" not in agents: raise KeyError("Dictionary must have a unique_id key.") - new_agents = pl.DataFrame(other) + new_agents = pl.DataFrame(agents) else: - if len(other) != len(obj._agents.columns): + if len(agents) != len(obj._agents.columns): raise ValueError( "Length of data must match the number of columns in the AgentSet if being added as a Collection." ) - new_agents = pl.DataFrame([other], schema=obj._agents.schema) + new_agents = pl.DataFrame([agents], schema=obj._agents.schema) if new_agents["unique_id"].dtype != pl.Int64: raise TypeError("unique_id column must be of type int64.") + # If self._mask is pl.Expr, then new mask is the same. + # If self._mask is pl.Series[bool], then new mask has to be updated. + + if isinstance(obj._mask, pl.Series): + original_active_indices = obj._agents.filter(obj._mask)["unique_id"] + obj._agents = pl.concat([obj._agents, new_agents], how="diagonal_relaxed") + + if isinstance(obj._mask, pl.Series): + obj._update_mask(original_active_indices, new_agents["unique_id"]) + return obj @overload @@ -184,23 +192,6 @@ def contains( else: return ids in self._agents["unique_id"] - def discard(self, ids: PolarsIdsLike, inplace: bool = True) -> Self: - """Remove an agent from the AgentSetPolars. Does not raise an error if the agent is not found. - - Parameters - ---------- - ids : PolarsMaskLike - The mask of agents to remove. - inplace : bool, optional - Whether to remove the agents in place, by default True. - - Returns - ------- - Self - The updated AgentSetPolars. - """ - return super().discard(ids=ids, inplace=inplace) - def get( self, attr_names: IntoExpr | Iterable[IntoExpr] | None, @@ -215,13 +206,20 @@ def get( return masked_df[masked_df.columns[0]] return masked_df - def remove(self, ids: PolarsMaskLike, inplace: bool = True) -> Self: + def remove(self, ids: PolarsIdsLike, inplace: bool = True) -> Self: obj = self._get_obj(inplace=inplace) initial_len = len(obj._agents) mask = obj._get_bool_mask(ids) + + if isinstance(obj._mask, pl.Series): + original_active_indices = obj._agents.filter(obj._mask)["unique_id"] + obj._agents = obj._agents.filter(mask.not_()) if len(obj._agents) == initial_len: raise KeyError(f"IDs {ids} not found in agent set.") + + if isinstance(obj.mask, pl.Series): + obj._update_mask(original_active_indices) return obj def set( @@ -334,6 +332,59 @@ def to_pandas(self) -> "AgentSetPandas": ) return new_obj + def _concatenate_agentsets( + self, + agentsets: Iterable[Self], + duplicates_allowed: bool = True, + keep_first_only: bool = True, + original_masked_index: pl.Series | None = None, + ) -> Self: + if not duplicates_allowed: + indices_list = [self._agents["unique_id"]] + [ + agentset._agents["unique_id"] for agentset in agentsets + ] + all_indices = pl.concat(indices_list) + if all_indices.is_duplicated().any(): + raise ValueError( + "Some ids are duplicated in the AgentSetDFs that are trying to be concatenated" + ) + if duplicates_allowed & keep_first_only: + final_dfs = [self._agents] + final_active_indices = [self._agents["unique_id"]] + final_indices = self._agents["unique_id"].clone() + for obj in iter(agentsets): + # Remove agents that are already in the final DataFrame + final_dfs.append( + obj._agents.filter(pl.col("unique_id").is_in(final_indices).not_()) + ) + # Add the indices of the active agents of current AgentSet + final_active_indices.append(obj._agents.filter(obj._mask)["unique_id"]) + # Update the indices of the agents in the final DataFrame + final_indices = pl.concat( + [final_indices, obj._agents["unique_id"]], how="vertical" + ) + final_df = pl.concat(final_dfs, how="diagonal_relaxed") + final_active_index = pl.concat(final_active_indices, how="vertical") + else: + final_df = pl.concat( + [obj._agents for obj in agentsets], how="diagonal_relaxed" + ) + final_active_index = pl.concat( + [obj._agents.filter(obj._mask)["unique_id"] for obj in agentsets] + ) + final_df = final_df.sort("unique_id") + final_mask = final_df["unique_id"].is_in(final_active_index) + new_obj = self._get_obj(inplace=False) + new_obj._agents = final_df + new_obj._mask = final_mask + if not isinstance(original_masked_index, type(None)): + ids_to_remove = original_masked_index.filter( + original_masked_index.is_in(new_obj._agents["unique_id"]).not_() + ) + if not ids_to_remove.is_empty(): + new_obj.remove(ids_to_remove, inplace=True) + return new_obj + def _get_bool_mask( self, mask: PolarsMaskLike = None, @@ -379,7 +430,7 @@ def _get_masked_df( self, mask: PolarsMaskLike = None, ) -> pl.DataFrame: - if (isinstance(mask, pl.Series) and mask.dtype == bool) or isinstance( + if (isinstance(mask, pl.Series) and mask.dtype == pl.Boolean) or isinstance( mask, pl.Expr ): return self._agents.filter(mask) @@ -414,6 +465,25 @@ def _get_masked_df( mask_df = mask_series.to_frame("unique_id") return mask_df.join(self._agents, on="unique_id", how="left") + @overload + def _get_obj_copy(self, obj: pl.Series) -> pl.Series: ... + + @overload + def _get_obj_copy(self, obj: pl.DataFrame) -> pl.DataFrame: ... + + def _get_obj_copy(self, obj: pl.Series | pl.DataFrame) -> pl.Series | pl.DataFrame: + return obj.clone() + + def _update_mask( + self, original_active_indices: pl.Series, new_indices: pl.Series | None = None + ) -> None: + if new_indices is not None: + self._mask = self._agents["unique_id"].is_in( + original_active_indices + ) | self._agents["unique_id"].is_in(new_indices) + else: + self._mask = self._agents["unique_id"].is_in(original_active_indices) + def __getattr__(self, key: str) -> pl.Series: super().__getattr__(key) return self._agents[key] @@ -484,3 +554,7 @@ def active_agents(self, mask: PolarsMaskLike) -> None: @property def inactive_agents(self) -> pl.DataFrame: return self.agents.filter(~self._mask) + + @property + def index(self) -> pl.Series: + return self._agents["unique_id"] diff --git a/mesa_frames/types.py b/mesa_frames/types.py index 499f19f..b3fec6b 100644 --- a/mesa_frames/types.py +++ b/mesa_frames/types.py @@ -23,6 +23,8 @@ DataFrame = pd.DataFrame | pl.DataFrame Series = pd.Series | pl.Series +Index = pd.Index | pl.Series BoolSeries = pd.Series | pl.Series MaskLike = AgnosticMask | PandasMaskLike | PolarsMaskLike IdsLike = AgnosticIds | PandasIdsLike | PolarsIdsLike +TimeT = float | int diff --git a/tests/test_agentset_pandas.py b/tests/test_agentset_pandas.py index cd15948..02bc79e 100644 --- a/tests/test_agentset_pandas.py +++ b/tests/test_agentset_pandas.py @@ -123,9 +123,17 @@ def test_discard(self, fix1_AgentSetPandas: ExampleAgentSet): def test_do(self, fix1_AgentSetPandas: ExampleAgentSet): agents = fix1_AgentSetPandas + + # Test with no_mask agents.do("add_wealth", 1) assert agents.agents.wealth.tolist() == [2, 3, 4, 5] assert agents.do("add_wealth", 1, return_results=True) == None + assert agents.agents.wealth.tolist() == [3, 4, 5, 6] + + # Test with a mask + agents.do("add_wealth", 1, mask=agents["wealth"] > 3) + assert agents.agents.wealth.tolist() == [3, 5, 6, 7] + def test_get(self, fix1_AgentSetPandas: ExampleAgentSet): agents = fix1_AgentSetPandas @@ -149,7 +157,6 @@ def test_remove(self, fix1_AgentSetPandas: ExampleAgentSet): assert agents.agents.index.tolist() == [2, 3] with pytest.raises(KeyError) as e: agents.remove([1]) - assert "1" in str(e) def test_select(self, fix1_AgentSetPandas: ExampleAgentSet): agents = fix1_AgentSetPandas diff --git a/tests/test_agentset_polars.py b/tests/test_agentset_polars.py index 8ed073e..234daae 100644 --- a/tests/test_agentset_polars.py +++ b/tests/test_agentset_polars.py @@ -118,9 +118,18 @@ def test_discard(self, fix1_AgentSetPolars: ExampleAgentSet): def test_do(self, fix1_AgentSetPolars: ExampleAgentSet): agents = fix1_AgentSetPolars + + # Test with no return_results, no mask agents.do("add_wealth", 1) assert agents.agents["wealth"].to_list() == [2, 3, 4, 5] + + # Test with return_results=True, no mask assert agents.do("add_wealth", 1, return_results=True) is None + assert agents.agents["wealth"].to_list() == [3, 4, 5, 6] + + # Test with a mask + agents.do("add_wealth", 1, mask=agents["wealth"] > 3) + assert agents.agents["wealth"].to_list() == [3, 5, 6, 7] def test_get(self, fix1_AgentSetPolars: ExampleAgentSet): agents = fix1_AgentSetPolars From d3cc2aa3b1a4f778e1b08e78665a109fbc616192 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Tue, 2 Jul 2024 10:06:28 +0000 Subject: [PATCH 02/15] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- docs/api/mesa_frames.html | 8 ++++---- docs/api/mesa_frames/agent.html | 6 +++--- docs/api/mesa_frames/model.html | 18 +++++++++--------- docs/scripts/readme_plot.py | 1 - mesa_frames/abstract/agents.py | 18 +++++++----------- mesa_frames/concrete/agents.py | 7 ++----- mesa_frames/concrete/agentset_pandas.py | 10 ++-------- mesa_frames/concrete/agentset_polars.py | 12 +++--------- mesa_frames/concrete/model.py | 3 ++- mesa_frames/datacollection.py | 1 + mesa_frames/types.py | 3 ++- tests/test_agentset_pandas.py | 6 ++---- 12 files changed, 37 insertions(+), 56 deletions(-) diff --git a/docs/api/mesa_frames.html b/docs/api/mesa_frames.html index ed2b1d9..52f4048 100644 --- a/docs/api/mesa_frames.html +++ b/docs/api/mesa_frames.html @@ -42,10 +42,10 @@

Submodules

mesa_frames

- - - - + + + +