Skip to content
This repository has been archived by the owner on Mar 16, 2024. It is now read-only.

Commit

Permalink
Feature/iterate on leeetcode research (#489)
Browse files Browse the repository at this point in the history
* cleanup agent research

* Check in latest iterations

* Updating payload, cleaning up

* add study search, paper, and type cleanup
  • Loading branch information
emrgnt-cmplxty authored Aug 13, 2023
1 parent 3db1bf1 commit 8cfbf55
Show file tree
Hide file tree
Showing 28 changed files with 7,128 additions and 380 deletions.
8 changes: 7 additions & 1 deletion automata/agent/openai_agent.py
Original file line number Diff line number Diff line change
Expand Up @@ -266,7 +266,13 @@ def _get_next_user_response(
content=f"{OpenAIAutomataAgent.OBSERVATION_MESSAGE}{result}\n{function_iteration_message}",
)
except Exception as e:
logger.info(f"Tool execution failed: {e}")
failure_message = f"Tool execution failed: {e}"
logger.info(failure_message)
return OpenAIChatMessage(
role="user",
content=failure_message,
)

return OpenAIChatMessage(
role="user",
content=f"{OpenAIAutomataAgent.CONTINUE_PREFIX}\n{self._get_iteration_status()}",
Expand Down
42 changes: 31 additions & 11 deletions automata/cli/scripts/run_tool_eval.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,8 @@
import logging
from typing import List, Optional

from evalplus.data import write_jsonl

from automata.cli.cli_utils import initialize_py_module_loader
from automata.eval import (
SymbolSearchAction,
Expand Down Expand Up @@ -64,34 +66,52 @@ def run_eval_harness(
eval_loader.expected_actions,
tool_execution,
)
for result in output.results:
outputs = []
for counter, result in enumerate(output.results):
if isinstance(result, SymbolSearchEvalResult):
expected_action = result.expected_action
if not isinstance(expected_action, SymbolSearchAction):
raise ValueError(
"Expected action must be a SymbolSearchAction."
)

logger.debug(f"Search Query: {expected_action.query}")
logger.debug(
f"Truth Top Match: {expected_action.search_results[0]}\n"
)

logger.debug("- Observed Results - \n")
if observed_action := result.observed_action:
if not isinstance(observed_action, SymbolSearchAction):
raise ValueError(
"Observed action must be a SymbolSearchAction."
)

if not result.is_partial_match:
logger.debug("- Observed Results - \n")

logger.debug(f"Search Query: {expected_action.query}")
logger.debug(
f"Top {TOP_K_MATCHES} Search Results: {observed_action.search_results[:TOP_K_MATCHES]}\n"
f"Truth Top Match: {expected_action.search_results[0]}\n" # type: ignore
)
logger.debug(
f"Full Match: {result.is_full_match}\nPartial Match: {result.is_partial_match}"

logger.debug(
f"Top {TOP_K_MATCHES} Search Results: {observed_action.search_results[:TOP_K_MATCHES]}\n" # type: ignore
)

logger.debug(
f"Full Match: {result.is_full_match}\nPartial Match: {result.is_partial_match}"
)

logger.debug("=" * 150)
outputs.append(
{
"task_id": f"ContextCodeRetrieval/{counter}",
"query": expected_action.query,
"truth_top_match": expected_action.search_results[0], # type: ignore
"top_k_matches": observed_action.search_results[ # type: ignore
:TOP_K_MATCHES
],
"k": TOP_K_MATCHES,
}
)

logger.debug("=" * 150)
# TODO - Put output_filepath in commands.py upstream
write_jsonl(kwargs.get("output_filepath", "eval_results.jsonl"), outputs)
logger.debug(output)
logger.debug("=" * 150)

Expand Down
3,164 changes: 3,164 additions & 0 deletions automata/config/eval/de_duped_cleaned_single_target_search_payload.json

Large diffs are not rendered by default.

44 changes: 42 additions & 2 deletions automata/config/eval/demo_single_target_search_payload.json
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
"template": {
"input_function": {
"type": "FunctionCall",
"name": "llm-facilitated-search",
"name": "symbol-similarity-search",
"arguments": {
"query": "{query}"
}
Expand All @@ -18,7 +18,47 @@
{
"query": "Which class is an abstract base class for building for agents?",
"result": "automata.agent.agent.Agent"
}
},
{
"query": "Which class is an abstract base class for building agent tools?",
"result": "automata.agent.agent.AgentToolkitBuilder"
},
{
"query": "Which class enumerates the available agent tools?",
"result": "automata.agent.agent.AgentToolkitNames"
},
{
"query": "Which class represents a general agent error?",
"result": "automata.agent.error.AgentGeneralError"
},
{
"query": "Which class builds tools for an OpenAI agent?",
"result": "automata.agent.openai_agent.OpenAIAgentToolkitBuilder"
},
{
"query": "Which class manages an OpenAI agent lifecycle?",
"result": "automata.agent.openai_agent.OpenAIAutomataAgent"
},
{
"query": "Which method of the OpenAI agent is responsible for running the agent?",
"result": "automata.agent.openai_agent.OpenAIAutomataAgent.run"
},
{
"query": "Which private method does the OpenAI agent call to perform setup?",
"result": "automata.agent.openai_agent.OpenAIAutomataAgent._setup"
},
{
"query": "Which class is responsible for executing instructions and reporting results back to the main system?",
"result": "automata.agent.openai_agent.OpenAIAutomataAgent"
},
{
"query": "Which method of the OpenAI agent is responsible for executing a single iteration of the task?",
"result": "automata.agent.openai_agent.OpenAIAutomataAgent.__next__"
},
{
"query": "Which method of the OpenAI agent is responsible for running the agent and iterating through the tasks until a result is produced?",
"result": "automata.agent.openai_agent.OpenAIAutomataAgent.run"
}
]
}
]
Loading

0 comments on commit 8cfbf55

Please sign in to comment.