Move support agent configuration into time and place module and minor…

… improvements - Make clear prices are integers - add extra parametrisation for prices - update the haggling scenario accordingly PiperOrigin-RevId: 696896500 Change-Id: I06e8d41a6f8c753dcb52ab824a26ab04103c9a68
google-deepmind · Nov 15, 2024 · 9ca553f · 9ca553f
1 parent 4c56926
commit 9ca553f
Show file tree

Hide file tree

Showing 2 changed files with 76 additions and 26 deletions.
diff --git a/examples/modular/environment/haggling.py b/examples/modular/environment/haggling.py
@@ -22,7 +22,7 @@
 import functools
 import random
 import types
-from typing import Union
+from typing import Union, Any
 
 from concordia.agents import entity_agent_with_logging
 from concordia.associative_memory import associative_memory
@@ -79,9 +79,13 @@ class WorldConfig:
     num_supporting_players: The number of supporting players in the scenario.
     only_match_with_support: Whether to only match with supporting players.
     buyer_base_reward_min: The minimum base reward for the buyer.
+    buyer_base_reward_max: The maximum base reward for the buyer.
+    seller_base_reward_min: The minimum base reward for the seller.
     seller_base_reward_max: The maximum base reward for the seller.
     num_games: The number of games to play.
     num_main_players: The number of main players in the scenario.
+    supporting_player_parameters: The parameters to be passed to the supporting
+      players factory build_agent function.
     random_seed: The random seed for the random number generator.
   """
 
@@ -96,9 +100,12 @@ class WorldConfig:
   num_supporting_players: int = 0
   only_match_with_support: bool = False
   buyer_base_reward_min: int = 5
+  buyer_base_reward_max: int = 6
+  seller_base_reward_min: int = 1
   seller_base_reward_max: int = 2
   num_games: int = 2
   num_main_players: int = 3
+  supporting_player_parameters: dict[str, Any] | None = None
   random_seed: int = 42
 
 
@@ -133,16 +140,23 @@ def get_shared_memories_and_context(premise: str) -> tuple[Sequence[str], str]:
   shared_memories = [
       'Fruits are sold by weight.',
       (
-          'The price of one kilogram of fruit is, on average, 3 coins. 1 coin'
-          ' is really cheap and 5 coins is really expensive.'
+          'The price of one kilogram of fruit is, on average, 3 coins. 1 coin '
+          'is really cheap and 5 coins is really expensive. The smallest value '
+          'of transaction is 1 coin, all prices have to be in multiples of 1 '
+          'coin. No fractional values are allowed.'
       ),
   ]
   shared_context = premise
   return shared_memories, shared_context
 
 
 def configure_player(
-    name: str, gender: str, year: int, is_main: bool, rng: random.Random
+    name: str,
+    gender: str,
+    year: int,
+    is_main: bool,
+    rng: random.Random,
+    supporting_player_parameters: dict[str, Any] | None = None,
 ):
   """Configure a player.
 
@@ -152,6 +166,7 @@ def configure_player(
     year: the year of the simulation to sample the age of the players
     is_main: whether the player is a main character or not
     rng: the random number generator to use
+    supporting_player_parameters: the parameters for the supporting player
 
   Returns:
     config: the config for the player
@@ -160,16 +175,21 @@ def configure_player(
       'player_specific_memories': [f'{name} always drives a hard bargain.'],
       'main_character': is_main,
   }
-  if not is_main:
-    extras['fixed_response_by_call_to_action'] = {
-        f'Would {name} accept the offer?:': 'accept',
-        f'What price would {name} propose?:': '3 coins',
+  if not is_main and supporting_player_parameters:
+
+    fixed_response = {
+        key: value.format(name=name)
+        for key, value in supporting_player_parameters[
+            'fixed_response_by_call_to_action'
+        ].items()
     }
-    extras['specific_memories'] = [
-        f'{name} does not care about the price. {name} will accept any offer!'
-        ' They are very vocal about it and will not haggle and will praise any'
-        ' offer.'
-    ]
+    specific_memories = []
+
+    for memory in supporting_player_parameters['specific_memories']:
+      specific_memories.append(memory.format(name=name))
+
+    extras['fixed_response_by_call_to_action'] = fixed_response
+    extras['specific_memories'] = specific_memories
 
   return formative_memories.AgentConfig(
       name=name,
@@ -232,6 +252,7 @@ def configure_players(
         sampled_settings.year,
         is_main=False,
         rng=rng,
+        supporting_player_parameters=sampled_settings.supporting_player_parameters,
     )
 
     player_configs.append(config)
@@ -429,8 +450,14 @@ def configure_scenes(
 
   for i in range(sampled_settings.num_games * len(pairs)):
 
-    buyer_base_reward = rng.randint(sampled_settings.buyer_base_reward_min, 6)
-    seller_base_reward = rng.randint(1, sampled_settings.seller_base_reward_max)
+    buyer_base_reward = rng.randint(
+        sampled_settings.buyer_base_reward_min,
+        sampled_settings.buyer_base_reward_max,
+    )
+    seller_base_reward = rng.randint(
+        sampled_settings.seller_base_reward_min,
+        sampled_settings.seller_base_reward_max,
+    )
 
     this_game_players = pairs[i % len(pairs)]
 
@@ -690,26 +717,32 @@ def __init__(
               player_config.name
           ),
       )
-      explicit_preference = agent_components.constant.Constant(
-          pre_act_key='Explicit preference',
-          state=(
-              f'{player_config.name} will accept any offer! They are very vocal'
-              ' about it and will not haggle and will praise any offer.'
-          ),
-      )
+      additional_components = {
+          'Guiding principle of good conversation': conversation_style
+      }
+      if (
+          'explciti_preference_component'
+          in sampled_settings.supporting_player_parameters
+      ):
+        explicit_preference = agent_components.constant.Constant(
+            pre_act_key='Explicit preference',
+            state=sampled_settings.supporting_player_parameters[
+                'explciti_preference_component'
+            ],
+        )
+        additional_components['Explicit preference'] = explicit_preference
+
       player = self._build_supporting_agent(
           config=player_config,
           model=self._model,
           memory=self._all_memories[player_config.name],
           clock=self._clock,
           update_time_interval=MAJOR_TIME_STEP,
-          additional_components={
-              'Guiding principle of good conversation': conversation_style,
-              'Explicit preference': explicit_preference,
-          },
+          additional_components=additional_components,
           fixed_response_by_call_to_action=player_config.extras[
               'fixed_response_by_call_to_action'
           ],
+          search_in_prompt=True,
       )
       supporting_players.append(player)
 

diff --git a/examples/modular/environment/modules/fruitville_haggling_gullible.py b/examples/modular/environment/modules/fruitville_haggling_gullible.py
@@ -123,7 +123,24 @@ def sample_parameters(seed: int | None = None):
       buyer_base_reward_min=6,
       seller_base_reward_max=1,
       only_match_with_support=True,
+      supporting_player_parameters={
+          "fixed_response_by_call_to_action": {
+              "Would {name} accept the offer?:": "accept",
+              "What price would {name} propose?:": "3 coins",
+          },
+          "specific_memories": [
+              "{name} does not care about the price. {name} will accept any"
+              " offer! They are very vocal about it and will not haggle and"
+              " will praise any offer."
+          ],
+          "explciti_preference_component": (
+              "{name} does not care about the price. {name} will accept any"
+              " offer! They are very vocal about it and will not haggle and"
+              " will praise any offer."
+          ),
+      },
   )
+
   all_names = list(MALE_NAMES) + list(FEMALE_NAMES)
   rng = random.Random(config.random_seed)
   rng.shuffle(all_names)