-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathagents.py
148 lines (134 loc) · 5.78 KB
/
agents.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
import time
import uuid
import random
class Agent:
def __init__(self, agent_id):
self.agent_id = agent_id
def read_context(self, context):
# For now, simply return the list of messages.
return context.get("messages", [])
def process(self, messages):
# Abstract method: override in subclasses.
raise NotImplementedError
def write_context(self, context, message):
context["messages"].append(message)
def act(self, context):
local_messages = self.read_context(context)
message = self.process(local_messages)
if message:
self.write_context(context, message)
# Basic GeneratorAgent: Generates a random proposal.
class GeneratorAgent(Agent):
def process(self, messages):
proposal = f"Hypothesis_{random.randint(1, 100)}"
message = {
"id": str(uuid.uuid4()),
"agent_id": self.agent_id,
"type": "proposal",
"content": proposal,
"priority": "normal",
"timestamp": time.time(),
"confidence": random.uniform(0.5, 1.0)
}
return message
# VerifierAgent: Chooses a proposal and either verifies or rejects it.
class VerifierAgent(Agent):
def process(self, messages):
proposals = [msg for msg in messages if msg["type"] == "proposal"]
if proposals:
chosen = random.choice(proposals)
verdict = "Verified" if random.random() > 0.3 else "Rejected"
message = {
"id": str(uuid.uuid4()),
"agent_id": self.agent_id,
"type": "verification",
"content": f"{verdict}: {chosen['content']}",
"priority": "high",
"timestamp": time.time(),
"confidence": random.uniform(0.5, 1.0),
"ref_id": chosen["id"] # Link back to the proposal.
}
return message
return None
# SynthesizerAgent: Combines proposals into a meta-hypothesis.
class SynthesizerAgent(Agent):
def process(self, messages):
proposals = [msg["content"] for msg in messages if msg["type"] == "proposal"]
if len(proposals) >= 2:
combined = f"MetaHypothesis: ({random.choice(proposals)}) + ({random.choice(proposals)})"
message = {
"id": str(uuid.uuid4()),
"agent_id": self.agent_id,
"type": "synthesis",
"content": combined,
"priority": "normal",
"timestamp": time.time(),
"confidence": random.uniform(0.6, 1.0)
}
return message
return None
# RLGeneratorAgent: Uses Q-learning to decide what type of proposal to generate.
class RLGeneratorAgent(Agent):
def __init__(self, agent_id, epsilon=0.2, alpha=0.1, gamma=0.9):
super().__init__(agent_id)
# Q-table: keys are (state, action) tuples, values are Q-values.
self.q_table = {}
# Possible actions representing proposal types.
self.actions = ["low", "medium", "high"]
# Mapping from proposal id to (state, action).
self.last_actions = {}
self.epsilon = epsilon # Exploration rate.
self.alpha = alpha # Learning rate.
self.gamma = gamma # Discount factor.
def _discretize_state(self, messages):
# Use the number of proposals as a simple state.
num_proposals = len([msg for msg in messages if msg["type"] == "proposal"])
if num_proposals < 3:
return 0
elif num_proposals <= 5:
return 1
else:
return 2
def _choose_action(self, state):
# ε-greedy action selection.
if random.random() < self.epsilon:
return random.choice(self.actions)
else:
# Choose the action with the highest Q-value for this state.
q_values = {action: self.q_table.get((state, action), 0) for action in self.actions}
return max(q_values, key=q_values.get)
def process(self, messages):
state = self._discretize_state(messages)
action = self._choose_action(state)
proposal = f"Hypothesis_{action}_{random.randint(1, 100)}"
message = {
"id": str(uuid.uuid4()),
"agent_id": self.agent_id,
"type": "proposal",
"content": proposal,
"priority": "normal",
"timestamp": time.time(),
"confidence": random.uniform(0.5, 1.0)
}
# Store the (state, action) pair for later Q-table updates.
self.last_actions[message["id"]] = (state, action)
return message
def update_q_values(self, context):
# Look for verification messages that reference proposals from this agent.
for msg in context.get("messages", []):
if msg["type"] == "verification" and "ref_id" in msg:
ref_id = msg["ref_id"]
if ref_id in self.last_actions:
(state, action) = self.last_actions[ref_id]
# Reward: +1 for Verified, -1 for Rejected.
reward = 1 if "Verified" in msg["content"] else -1
current_q = self.q_table.get((state, action), 0)
max_future = max([self.q_table.get((state, a), 0) for a in self.actions])
new_q = current_q + self.alpha * (reward + self.gamma * max_future - current_q)
self.q_table[(state, action)] = new_q
# Remove the entry so that the same proposal is not updated again.
del self.last_actions[ref_id]
def report_q_table(self):
print(f"\n[{self.agent_id}] Q-table:")
for key, value in self.q_table.items():
print(f" State {key[0]}, Action {key[1]}: Q-value = {value:.3f}")