-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
9 changed files
with
226 additions
and
28 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,129 @@ | ||
from typing import Optional | ||
from trulens_eval import Feedback, Select | ||
from trulens_eval import Tru | ||
from trulens_eval import TruChain | ||
from trulens_eval.feedback import OpenAI as fOpenAI | ||
import numpy as np | ||
|
||
from src.planning.state import RefactoringAgentState, record_to_str | ||
|
||
tru = Tru() | ||
|
||
# These are to be used by the LLMController where the second query is the one that is used | ||
|
||
sentinel = -1.0 | ||
|
||
|
||
def create_sentinel_aggregator(agg): | ||
def aggregator(values): | ||
# Filter out None values | ||
values = [v for v in values if v is not sentinel] | ||
return agg(values) | ||
|
||
return aggregator | ||
|
||
|
||
def create_tool_relevance_feedback(state): | ||
def tool_relevance(output) -> float: | ||
provider = fOpenAI() | ||
# return sentinel if the output is not a dict | ||
if ( | ||
not isinstance(output, dict) | ||
or "tool" not in output | ||
or "tool_input" not in output | ||
): | ||
return sentinel | ||
tool_id = output["tool"] | ||
tool_input = output["tool_input"] | ||
res = float( | ||
provider.endpoint.client.chat.completions.create( | ||
model="gpt-3.5-turbo", # Use better model? | ||
messages=[ | ||
{ | ||
"role": "system", | ||
"content": "How relevant was the selection of TOOL with TOOL_INPUT in addressing the current task in STATE? Reply with a number between 0 and 10.", | ||
}, | ||
{ | ||
"role": "user", | ||
"content": f"TOOL: {tool_id}; TOOL_INPUT: {tool_input}; STATE: {state}", | ||
}, | ||
], | ||
) | ||
.choices[0] | ||
.message.content | ||
) | ||
return res | ||
|
||
f_tool_relevance = ( | ||
Feedback(tool_relevance) | ||
.on_output() | ||
.aggregate(create_sentinel_aggregator(np.mean)) | ||
) | ||
return f_tool_relevance | ||
|
||
|
||
def create_short_thought_feedback(): | ||
def short_thought(thought: str) -> float: | ||
return float(len(thought)) | ||
|
||
return Feedback(short_thought).on_output() | ||
|
||
|
||
def create_evolving_thought_feedback(state: RefactoringAgentState): | ||
def evolving_thought(thought: str): | ||
provider = fOpenAI() | ||
past_thoughts_actions = [] | ||
for i in range(len(state["thoughts"]) - 1): | ||
past_thoughts_actions.append( | ||
f"#T{i}: {state['thoughts'][i]}\n#A{i}: {record_to_str(state['history'][i])}" | ||
) | ||
res = float( | ||
provider.endpoint.client.chat.completions.create( | ||
model="gpt-3.5-turbo", # Use better model? | ||
messages=[ | ||
{ | ||
"role": "system", | ||
"content": "Given PAST_THOUGHTS_AND_ACTIONS, how much has the NEXT_THOUGHT added to solving the ULTIMATE_GOAL? Give a number between 0 to 100 where 100 means it has added a lot. Reply only with a number, for example: '80'", | ||
}, | ||
{ | ||
"role": "user", | ||
"content": f"### PAST_THOUGHTS_AND_ACTIONS ###\n {past_thoughts_actions}\n\n\n ### NEXT_THOUGHT ###\n {thought}\n\n\n ### ULTIMATE_GOAL ###\n {state['goal']}", | ||
}, | ||
], | ||
) | ||
.choices[0] | ||
.message.content | ||
) | ||
return res | ||
|
||
return Feedback(evolving_thought).on_output() | ||
|
||
|
||
def create_repeating_work_feedback(state: RefactoringAgentState): | ||
def repeated_work(thought: str): | ||
provider = fOpenAI() | ||
past_thoughts_actions = [] | ||
for i in range(len(state["thoughts"]) - 1): | ||
past_thoughts_actions.append( | ||
f"#T{i}: {state['thoughts'][i]}\n#A{i}: {record_to_str(state['history'][i])}" | ||
) | ||
res = float( | ||
provider.endpoint.client.chat.completions.create( | ||
model="gpt-3.5-turbo", # Use better model? | ||
messages=[ | ||
{ | ||
"role": "system", | ||
"content": "Given PAST_THOUGHTS_AND_ACTIONS, how much is the NEXT_THOUGHT suggesting we repeat work already completed? Give a number between 0 to 100 where 100 means it is suggesting a complete repeat of work already completed. Reply only with a number, for example: '80'", | ||
}, | ||
{ | ||
"role": "user", | ||
"content": f"### PAST_THOUGHTS_AND_ACTIONS ###\n {past_thoughts_actions}\n\n\n ### NEXT_THOUGHT ###\n {thought}", | ||
}, | ||
], | ||
) | ||
.choices[0] | ||
.message.content | ||
) | ||
return res | ||
|
||
return Feedback(repeated_work).on_output() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.