From 6bdce1d5d9a10a674a48f1d06910837afc219df2 Mon Sep 17 00:00:00 2001 From: chenweize1998 Date: Tue, 10 Oct 2023 16:26:26 +0800 Subject: [PATCH] refactor: reorganize script location --- .github/workflows/test.yml | 2 +- .gitignore | 3 ++- .../rules/executor/coverage_test.py | 14 +++++++++----- .../tasksolving/tool_using/24point/config.yaml | 2 +- .../tasks/tasksolving/tool_using/bmi/config.yaml | 2 +- .../tasksolving/tool_using/bookclub/config.yaml | 2 +- .../tasks/tasksolving/tool_using/car/config.yaml | 2 +- .../tasks/tasksolving/tool_using/date/config.yaml | 2 +- .../tasks/tasksolving/tool_using/diy/config.yaml | 2 +- .../tasks/tasksolving/tool_using/party/config.yaml | 2 +- .../tasksolving/tool_using/sudoku/config.yaml | 2 +- .../tasksolving/tool_using/tools_simplified.json | 0 .../tasksolving/tool_using/trending/config.yaml | 2 +- .../tasksolving/tool_using/vacation/config.yaml | 2 +- scripts/__init__.py | 0 .../evaluate_commongen.py | 0 evaluate_logic.py => scripts/evaluate_logic.py | 0 evaluate_math.py => scripts/evaluate_math.py | 0 .../evaluate_responsegen.py | 0 test_pokemon_env.py | 4 ---- 20 files changed, 22 insertions(+), 21 deletions(-) rename tools_simplified.json => agentverse/tasks/tasksolving/tool_using/tools_simplified.json (100%) create mode 100644 scripts/__init__.py rename evaluate_commongen.py => scripts/evaluate_commongen.py (100%) rename evaluate_logic.py => scripts/evaluate_logic.py (100%) rename evaluate_math.py => scripts/evaluate_math.py (100%) rename evaluate_responsegen.py => scripts/evaluate_responsegen.py (100%) delete mode 100644 test_pokemon_env.py diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index bf14b09df..e593f0609 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -38,4 +38,4 @@ jobs: run: | python setup.py develop python agentverse_command/benchmark.py --task tasksolving/mgsm/gpt-3.5 --dataset_path data/mgsm/test_sample.jsonl --overwrite --output_path ci_smoke_test_output --tasks_dir ./agentverse/tasks - python evaluate_math.py --path ci_smoke_test_output/results.jsonl --ci_smoke_test \ No newline at end of file + python scripts/evaluate_math.py --path ci_smoke_test_output/results.jsonl --ci_smoke_test \ No newline at end of file diff --git a/.gitignore b/.gitignore index 66b07b913..bbfb5aec0 100644 --- a/.gitignore +++ b/.gitignore @@ -172,4 +172,5 @@ raw/ results tmp/ data/toolbench -logs/ \ No newline at end of file +logs/ +ci_smoke_test_output/ \ No newline at end of file diff --git a/agentverse/environments/tasksolving_env/rules/executor/coverage_test.py b/agentverse/environments/tasksolving_env/rules/executor/coverage_test.py index ce3938707..37c3073ba 100644 --- a/agentverse/environments/tasksolving_env/rules/executor/coverage_test.py +++ b/agentverse/environments/tasksolving_env/rules/executor/coverage_test.py @@ -29,16 +29,18 @@ def step( *args, **kwargs, ) -> Any: - from evaluate_commongen import scoring + from scripts.evaluate_commongen import scoring - coverage, missing_tokens = scoring([s.content for s in solution], [task_description]) + coverage, missing_tokens = scoring( + [s.content for s in solution], [task_description] + ) if len(missing_tokens[0]) == 0: missing_tokens = "No missing tokens." else: missing_tokens = ", ".join(missing_tokens[0]) result = f"Coverage: {coverage*100:.2f}%\nMissing Tokens: {missing_tokens}" return [ExecutorMessage(content=result)] - + async def astep( self, agent: ExecutorAgent, @@ -47,9 +49,11 @@ async def astep( *args, **kwargs, ) -> Any: - from evaluate_commongen import scoring + from scripts.evaluate_commongen import scoring - coverage, missing_tokens = scoring([s.content for s in solution], [task_description]) + coverage, missing_tokens = scoring( + [s.content for s in solution], [task_description] + ) if len(missing_tokens[0]) == 0: missing_tokens = "No missing tokens." else: diff --git a/agentverse/tasks/tasksolving/tool_using/24point/config.yaml b/agentverse/tasks/tasksolving/tool_using/24point/config.yaml index 11a185f3b..4e65b3f5b 100644 --- a/agentverse/tasks/tasksolving/tool_using/24point/config.yaml +++ b/agentverse/tasks/tasksolving/tool_using/24point/config.yaml @@ -2,7 +2,7 @@ cnt_agents: &cnt_agents 3 cnt_tool_agents: &cnt_tool_agents 2 max_rounds: &max_rounds 5 max_criticizing_rounds: 3 -tool_config: &tool_config tools_simplified.json +tool_config: &tool_config agentverse/tasks/tasksolving/tool_using/tools_simplified.json task_description: Recently, it has become popular in the AI field to verify the mathematical reasoning abilities of large language models by observing if they can solve the "24-Point Game." What is this game? Does it have a code-based solution? If it does, provide a Python code along with test cases and test its functionality. What are some other similar games that can be used to test the models' mathematical reasoning abilities? diff --git a/agentverse/tasks/tasksolving/tool_using/bmi/config.yaml b/agentverse/tasks/tasksolving/tool_using/bmi/config.yaml index 95ac4cf6a..97411d782 100644 --- a/agentverse/tasks/tasksolving/tool_using/bmi/config.yaml +++ b/agentverse/tasks/tasksolving/tool_using/bmi/config.yaml @@ -2,7 +2,7 @@ cnt_agents: &cnt_agents 3 cnt_tool_agents: &cnt_tool_agents 2 max_rounds: &max_rounds 5 max_criticizing_rounds: 3 -tool_config: &tool_config tools_simplified.json +tool_config: &tool_config agentverse/tasks/tasksolving/tool_using/tools_simplified.json task_description: I want to lose 5kg in the next 2 months. I weigh 70kg, am 170cm tall, and my age is 25. Calculate my BMI and based on that, suggest a workout routine and daily calorie intake to help me achieve my goal. diff --git a/agentverse/tasks/tasksolving/tool_using/bookclub/config.yaml b/agentverse/tasks/tasksolving/tool_using/bookclub/config.yaml index 98385570b..abc03fe11 100644 --- a/agentverse/tasks/tasksolving/tool_using/bookclub/config.yaml +++ b/agentverse/tasks/tasksolving/tool_using/bookclub/config.yaml @@ -2,7 +2,7 @@ cnt_agents: &cnt_agents 3 cnt_tool_agents: &cnt_tool_agents 2 max_rounds: &max_rounds 5 max_criticizing_rounds: 3 -tool_config: &tool_config tools_simplified.json +tool_config: &tool_config agentverse/tasks/tasksolving/tool_using/tools_simplified.json task_description: I want to kick off a book club with my friends. Can you tell me the top 5 bestselling books this month, gather the content summary for each, and find online platforms where we can buy or borrow them? diff --git a/agentverse/tasks/tasksolving/tool_using/car/config.yaml b/agentverse/tasks/tasksolving/tool_using/car/config.yaml index 1df8ccd7e..4344c707e 100644 --- a/agentverse/tasks/tasksolving/tool_using/car/config.yaml +++ b/agentverse/tasks/tasksolving/tool_using/car/config.yaml @@ -2,7 +2,7 @@ cnt_agents: &cnt_agents 4 cnt_tool_agents: &cnt_tool_agents 3 max_rounds: &max_rounds 5 max_criticizing_rounds: 3 -tool_config: &tool_config tools_simplified.json +tool_config: &tool_config agentverse/tasks/tasksolving/tool_using/tools_simplified.json task_description: I am planning to buy a new car. Could you help me compare the features and prices of the latest models of Tesla, Ford, and Toyota? Include details about range, charging time, safety features, and after-sales service. Also, provide a brief analysis of the pros and cons of each car. diff --git a/agentverse/tasks/tasksolving/tool_using/date/config.yaml b/agentverse/tasks/tasksolving/tool_using/date/config.yaml index 4dc613e90..6e12f1746 100644 --- a/agentverse/tasks/tasksolving/tool_using/date/config.yaml +++ b/agentverse/tasks/tasksolving/tool_using/date/config.yaml @@ -2,7 +2,7 @@ cnt_agents: &cnt_agents 4 cnt_tool_agents: &cnt_tool_agents 3 max_rounds: &max_rounds 5 max_criticizing_rounds: 3 -tool_config: &tool_config tools_simplified.json +tool_config: &tool_config agentverse/tasks/tasksolving/tool_using/tools_simplified.json task_description: I am planning a date with my girlfriend this week, please search for a good movie theater and a restaurant near Tsinghua University in Beijing and recommend a good movie to watch. Please search the web. diff --git a/agentverse/tasks/tasksolving/tool_using/diy/config.yaml b/agentverse/tasks/tasksolving/tool_using/diy/config.yaml index 8ea9b8ea3..8fa2f173c 100644 --- a/agentverse/tasks/tasksolving/tool_using/diy/config.yaml +++ b/agentverse/tasks/tasksolving/tool_using/diy/config.yaml @@ -2,7 +2,7 @@ cnt_agents: &cnt_agents 4 cnt_tool_agents: &cnt_tool_agents 3 max_rounds: &max_rounds 5 max_criticizing_rounds: 3 -tool_config: &tool_config tools_simplified.json +tool_config: &tool_config agentverse/tasks/tasksolving/tool_using/tools_simplified.json task_description: I've recently taken an interest in DIY home projects. Search for beginner-friendly DIY projects that can be completed over the weekend. Also, provide a list of materials required and a step-by-step guide for each project. diff --git a/agentverse/tasks/tasksolving/tool_using/party/config.yaml b/agentverse/tasks/tasksolving/tool_using/party/config.yaml index 76134a374..df7fad0bb 100644 --- a/agentverse/tasks/tasksolving/tool_using/party/config.yaml +++ b/agentverse/tasks/tasksolving/tool_using/party/config.yaml @@ -2,7 +2,7 @@ cnt_agents: &cnt_agents 4 cnt_tool_agents: &cnt_tool_agents 3 max_rounds: &max_rounds 5 max_criticizing_rounds: 3 -tool_config: &tool_config tools_simplified.json +tool_config: &tool_config agentverse/tasks/tasksolving/tool_using/tools_simplified.json task_description: I want to hold a party at somewhere around Tsinghua University tomorrow. I need you to look for some best places for holding a party nearby, and tell me whether the weather is good for holding a party tomorrow. Also, I want to know what activities can be considered in my party. Help me search the web. diff --git a/agentverse/tasks/tasksolving/tool_using/sudoku/config.yaml b/agentverse/tasks/tasksolving/tool_using/sudoku/config.yaml index e87547ca5..4d1202028 100644 --- a/agentverse/tasks/tasksolving/tool_using/sudoku/config.yaml +++ b/agentverse/tasks/tasksolving/tool_using/sudoku/config.yaml @@ -2,7 +2,7 @@ cnt_agents: &cnt_agents 3 cnt_tool_agents: &cnt_tool_agents 2 max_rounds: &max_rounds 5 max_criticizing_rounds: 3 -tool_config: &tool_config tools_simplified.json +tool_config: &tool_config agentverse/tasks/tasksolving/tool_using/tools_simplified.json task_description: I've just heard an interesting game called 'sudoku'. Can you search for the rules of this game and the solution to this game? Finally, write a python script to automatically solve this game if possible. diff --git a/tools_simplified.json b/agentverse/tasks/tasksolving/tool_using/tools_simplified.json similarity index 100% rename from tools_simplified.json rename to agentverse/tasks/tasksolving/tool_using/tools_simplified.json diff --git a/agentverse/tasks/tasksolving/tool_using/trending/config.yaml b/agentverse/tasks/tasksolving/tool_using/trending/config.yaml index b1685b131..101612774 100644 --- a/agentverse/tasks/tasksolving/tool_using/trending/config.yaml +++ b/agentverse/tasks/tasksolving/tool_using/trending/config.yaml @@ -2,7 +2,7 @@ cnt_agents: &cnt_agents 4 cnt_tool_agents: &cnt_tool_agents 3 max_rounds: &max_rounds 5 max_criticizing_rounds: 3 -tool_config: &tool_config tools_simplified.json +tool_config: &tool_config agentverse/tasks/tasksolving/tool_using/tools_simplified.json task_description: I'm currently analyzing what is popular on the website. Can you help me find the recent trending stuff. It could be anything, like trending news, products, books, movies, music, etc. Give a summarization for me. diff --git a/agentverse/tasks/tasksolving/tool_using/vacation/config.yaml b/agentverse/tasks/tasksolving/tool_using/vacation/config.yaml index 9f0c73c51..c10dd1ed0 100644 --- a/agentverse/tasks/tasksolving/tool_using/vacation/config.yaml +++ b/agentverse/tasks/tasksolving/tool_using/vacation/config.yaml @@ -2,7 +2,7 @@ cnt_agents: &cnt_agents 4 cnt_tool_agents: &cnt_tool_agents 3 max_rounds: &max_rounds 5 max_criticizing_rounds: 3 -tool_config: &tool_config tools_simplified.json +tool_config: &tool_config agentverse/tasks/tasksolving/tool_using/tools_simplified.json task_description: I'm planning a two-week vacation to Japan next month. Help me plan my itinerary. I want to visit Tokyo, Kyoto, and Osaka. Look for the top tourist attractions in each city, and also suggest the best mode of travel between these cities. Additionally, find out the weather forecast for the month I'll be visiting. diff --git a/scripts/__init__.py b/scripts/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/evaluate_commongen.py b/scripts/evaluate_commongen.py similarity index 100% rename from evaluate_commongen.py rename to scripts/evaluate_commongen.py diff --git a/evaluate_logic.py b/scripts/evaluate_logic.py similarity index 100% rename from evaluate_logic.py rename to scripts/evaluate_logic.py diff --git a/evaluate_math.py b/scripts/evaluate_math.py similarity index 100% rename from evaluate_math.py rename to scripts/evaluate_math.py diff --git a/evaluate_responsegen.py b/scripts/evaluate_responsegen.py similarity index 100% rename from evaluate_responsegen.py rename to scripts/evaluate_responsegen.py diff --git a/test_pokemon_env.py b/test_pokemon_env.py deleted file mode 100644 index b10e764e7..000000000 --- a/test_pokemon_env.py +++ /dev/null @@ -1,4 +0,0 @@ -import requests - -requests.post('http://127.0.0.1:10002/make_decision', headers={'Content-Type': 'application/json'}, json={'agent_ids': [0, 1, 2, 3, 4, 5]}) -# requests.post('http://127.0.0.1:10002/chat', headers={'Content-Type': 'application/json'}, json={'content': 'Hi!', 'receiver': 'May', 'receiver_id': 0, 'sender': 'Brendan'})