From 2bf6fb4c72a48addba562f349b2c29801126bb14 Mon Sep 17 00:00:00 2001
From: Haofei Yu <1125027232@qq.com>
Date: Tue, 26 Nov 2024 19:28:18 -0600
Subject: [PATCH 01/11] update to run reserachtown

---
 configs/param.yaml                               |  2 +-
 research_bench/proposal_writing.py               | 16 ++++++++++++----
 research_bench/run_eval.py                       |  2 +-
 research_bench/run_eval.sh                       |  4 ++--
 .../envs/env_proposal_writing_without_rag.py     | 10 +++-------
 research_town/utils/sampler.py                   |  5 ++++-
 6 files changed, 23 insertions(+), 16 deletions(-)

diff --git a/configs/param.yaml b/configs/param.yaml
index 94bef813..46f13243 100644
--- a/configs/param.yaml
+++ b/configs/param.yaml
@@ -9,5 +9,5 @@ temperature: 0.6
 top_p: null
 write_proposal_strategy: default
 max_env_run_num: 1
-proposal_num: 2
+proposal_num: 1
 use_rag: True
diff --git a/research_bench/proposal_writing.py b/research_bench/proposal_writing.py
index 38ff556b..73b2af42 100644
--- a/research_bench/proposal_writing.py
+++ b/research_bench/proposal_writing.py
@@ -28,14 +28,20 @@ def write_proposal_researchtown(
         agent_manager=agent_manager,
     )
 
-    leader_profile = profile_db.get(name=profiles[0].name)[0]
-    print('leader_profile', leader_profile)
+    leader_profile = profiles[0]
     leader = agent_manager.create_agent(leader_profile, role='leader')
+    members = []
+    for member_profile in profiles[1:]:
+        member = agent_manager.create_agent(member_profile, role='member')
+        members.append(member)
     if not leader_profile:
         raise ValueError('Failed to create leader agent')
 
+    ref_contents = [ref for ref in ref_contents if ref is not None]
+    assert None not in ref_contents
     env.on_enter(
         leader=leader,
+        members=members,
         contexts=ref_contents,
     )
 
@@ -48,7 +54,8 @@ def write_proposal_researchtown(
 
     # Exit the environment and retrieve the generated proposal
     exit_status, exit_dict = env.on_exit()
-    proposal = exit_dict.get('proposal')
+    import pdb; pdb.set_trace()
+    proposal = exit_dict.get('proposals')[0]
     if proposal and proposal.content:
         return str(proposal.content)
     else:
@@ -154,6 +161,7 @@ def write_proposal_with_only_citations(ref_contents: List[str], config: Config)
         }
     ]
     response = model_prompting(config.param.base_llm, prompt, max_token_num=config.param.max_token_num)[0]
+    import pdb; pdb.set_trace()
     return response
 
 
@@ -334,7 +342,7 @@ def write_proposal(
         return write_proposal_with_profiles_and_citations(
             profiles=profiles, ref_contents=ref_contents, config=config
         )
-    elif mode == 'textgnn':
+    elif mode == 'research_town':
         return write_proposal_researchtown(
             profiles=profiles, ref_contents=ref_contents, config=config
         )
diff --git a/research_bench/run_eval.py b/research_bench/run_eval.py
index bd78351e..c2d7bbca 100644
--- a/research_bench/run_eval.py
+++ b/research_bench/run_eval.py
@@ -79,7 +79,7 @@ def main() -> None:
             'author_only',
             'citation_only',
             'author_citation',
-            'textgnn',
+            'research_town',
             'sakana_ai_scientist',
         ],
         help='Processing mode',
diff --git a/research_bench/run_eval.sh b/research_bench/run_eval.sh
index e8663b5c..80e1ae57 100755
--- a/research_bench/run_eval.sh
+++ b/research_bench/run_eval.sh
@@ -1,9 +1,9 @@
 #!/bin/bash
 
 # Define the input and output paths, along with the modes to test
-INPUT_PATH="./mlbench/mlbench.json"
+INPUT_PATH="./mlbench/mlbench_full.json"
 OUTPUT_DIR="./results"
-MODES=("citation_only")
+MODES=("research_town")
 NUM_PROCESSES=4
 
 # Loop through each mode and run the evaluation
diff --git a/research_town/envs/env_proposal_writing_without_rag.py b/research_town/envs/env_proposal_writing_without_rag.py
index 8ba970bc..3a7588fa 100644
--- a/research_town/envs/env_proposal_writing_without_rag.py
+++ b/research_town/envs/env_proposal_writing_without_rag.py
@@ -62,17 +62,13 @@ def run(self) -> Generator[Tuple[Progress, Agent], None, None]:
                 contexts=self.contexts,
                 config=self.config,
             )
-
             yield insight, researcher
-            insights.append(insight)
-
-        # Step 3: Researchers brainstorm ideas based on their insights
-        for researcher in researchers:
-            idea = researcher.brainstorm_idea(insights=insights, config=self.config)
+            idea = researcher.brainstorm_idea(insights=[insight], config=self.config)
             yield idea, researcher
+            insights.append(insight)
             ideas.append(idea)
 
-        # Step 4: Leader summarizes ideas and writes proposals
+        # Step 2: Leader summarizes ideas and writes proposals
         idea_combos = sample_ideas(ideas, self.config.param.proposal_num)
         for idea_combo in idea_combos:
             summarized_idea = self.leader.summarize_idea(
diff --git a/research_town/utils/sampler.py b/research_town/utils/sampler.py
index 6eb87aff..956547ca 100644
--- a/research_town/utils/sampler.py
+++ b/research_town/utils/sampler.py
@@ -6,8 +6,11 @@
 
 def sample_ideas(lst: List[Idea], n: int) -> List[List[Idea]]:
     total_subsets = 2 ** len(lst) - (len(lst) + 1)
+    if len(lst) == 1:
+        return lst
     if n > total_subsets:
-        raise ValueError(f'n cannot be greater than {total_subsets}')
+        print(f'n cannot be greater than {total_subsets}')
+        n = total_subsets
 
     sampled_subsets: set[Tuple[int, ...]] = set()
     lst_len = len(lst)

From a49a361f152363adbb8c6906119d572aa14a3bb1 Mon Sep 17 00:00:00 2001
From: Haofei Yu <1125027232@qq.com>
Date: Tue, 26 Nov 2024 19:31:20 -0600
Subject: [PATCH 02/11] fix sampling error

---
 research_town/utils/sampler.py | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/research_town/utils/sampler.py b/research_town/utils/sampler.py
index 956547ca..7322682c 100644
--- a/research_town/utils/sampler.py
+++ b/research_town/utils/sampler.py
@@ -7,10 +7,9 @@
 def sample_ideas(lst: List[Idea], n: int) -> List[List[Idea]]:
     total_subsets = 2 ** len(lst) - (len(lst) + 1)
     if len(lst) == 1:
-        return lst
+        return [lst]
     if n > total_subsets:
-        print(f'n cannot be greater than {total_subsets}')
-        n = total_subsets
+        raise ValueError(f'n cannot be greater than {total_subsets}')
 
     sampled_subsets: set[Tuple[int, ...]] = set()
     lst_len = len(lst)

From a89e7a8094e2ef40d03a1118a7d9bb74190d1bc2 Mon Sep 17 00:00:00 2001
From: Haofei Yu <1125027232@qq.com>
Date: Tue, 26 Nov 2024 19:45:45 -0600
Subject: [PATCH 03/11] delete pdf

---
 research_bench/proposal_writing.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/research_bench/proposal_writing.py b/research_bench/proposal_writing.py
index 73b2af42..913a1265 100644
--- a/research_bench/proposal_writing.py
+++ b/research_bench/proposal_writing.py
@@ -54,7 +54,6 @@ def write_proposal_researchtown(
 
     # Exit the environment and retrieve the generated proposal
     exit_status, exit_dict = env.on_exit()
-    import pdb; pdb.set_trace()
     proposal = exit_dict.get('proposals')[0]
     if proposal and proposal.content:
         return str(proposal.content)

From 29f04f21ccdbc633dac9a80d86e20da8c762d642 Mon Sep 17 00:00:00 2001
From: Haofei Yu <1125027232@qq.com>
Date: Wed, 27 Nov 2024 02:38:26 -0600
Subject: [PATCH 04/11] commit

---
 research_bench/eval_only.py        | 42 +++++++++++++
 research_bench/nv_api.py           | 19 ++++++
 research_bench/proposal_writing.py | 96 ++++++++++++++++++++++++++++--
 research_bench/run_eval.py         |  4 +-
 research_bench/run_eval.sh         |  2 +-
 5 files changed, 156 insertions(+), 7 deletions(-)
 create mode 100644 research_bench/eval_only.py
 create mode 100644 research_bench/nv_api.py

diff --git a/research_bench/eval_only.py b/research_bench/eval_only.py
new file mode 100644
index 00000000..ad39dc67
--- /dev/null
+++ b/research_bench/eval_only.py
@@ -0,0 +1,42 @@
+import jsonlines
+from research_bench.eval import compute_proposal_metrics
+from tqdm import tqdm
+import json
+
+dataset = []
+with open('./results/mlbench_result_4o_mini_citation_only.jsonl', 'r') as f:
+    for line_num, line in enumerate(f, 1):
+        try:
+            obj = json.loads(line)
+            dataset.append(obj)
+        except json.JSONDecodeError as e:
+            print(f"Error decoding JSON on line {line_num}: {e}")
+            continue
+
+overall_metrics = {
+    'openai_sim': [],
+    'voyageai_sim': [],
+    'openai_sim_q1': [],
+    'openai_sim_q2': [],
+    'openai_sim_q3': [],
+    'openai_sim_q4': [],
+    'openai_sim_q5': [],
+    'voyageai_sim_q1': [],
+    'voyageai_sim_q2': [],
+    'voyageai_sim_q3': [],
+    'voyageai_sim_q4': [],
+    'voyageai_sim_q5': [],
+}
+
+for data in tqdm(dataset):
+    ref_proposal = data['ref_proposal']
+    gen_proposal = data['gen_proposal']
+    if 'openai_sim' not in data.keys():
+        print(data['paper_id'])
+    #metrics = compute_proposal_metrics(ref_proposal, gen_proposal)
+    #print(metrics)
+    for key in overall_metrics.keys():
+        overall_metrics[key].append(data[key])
+
+for key, values in overall_metrics.items():
+    print(f'{key}: {sum(values) / len(values)}')
\ No newline at end of file
diff --git a/research_bench/nv_api.py b/research_bench/nv_api.py
new file mode 100644
index 00000000..1d4a2b6a
--- /dev/null
+++ b/research_bench/nv_api.py
@@ -0,0 +1,19 @@
+from openai import OpenAI
+
+client = OpenAI(
+  base_url = "https://integrate.api.nvidia.com/v1",
+  api_key = "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"
+)
+
+completion = client.chat.completions.create(
+  model="nvidia/nv-embed-v1",
+  messages=[{"role":"user","content":"Write a limerick about the wonders of GPU computing."}],
+  temperature=0.2,
+  top_p=0.7,
+  max_tokens=1024,
+  stream=True
+)
+
+for chunk in completion:
+  if chunk.choices[0].delta.content is not None:
+    print(chunk.choices[0].delta.content, end="")
\ No newline at end of file
diff --git a/research_bench/proposal_writing.py b/research_bench/proposal_writing.py
index 913a1265..c0820fce 100644
--- a/research_bench/proposal_writing.py
+++ b/research_bench/proposal_writing.py
@@ -1,5 +1,5 @@
 from typing import List
-
+import random
 from research_town.agents import AgentManager
 from research_town.configs import Config
 from research_town.data import Profile
@@ -129,7 +129,12 @@ def write_proposal_with_only_profiles(profiles: List[Profile], config: Config) -
 
 
 def write_proposal_with_only_citations(ref_contents: List[str], config: Config) -> str:
-    ref_strs = '\n'.join([ref for ref in ref_contents if ref is not None])
+    random.shuffle(ref_contents)
+    ref_strs = ''
+    for idx, ref in enumerate(ref_contents):
+        if ref is None:
+            continue
+        ref_strs += f'paper {idx + 1}. {ref}\n\n'
 
     prompt = [
         {
@@ -155,12 +160,11 @@ def write_proposal_with_only_citations(ref_contents: List[str], config: Config)
                 'Outline your proposed methodology in detail, including the method, dataset, metric that you plan to use.\n'
                 'Describe the expected outcomes. MAKE IT CLEAR.\n\n'
                 f'Contents collect from cited papers:\n{ref_strs}\n\n'
-                'Please provide the five core questions contents based on the above cited contents.'
+                'Please brainstorm a following proposal with the given format.'
             ),
         }
     ]
     response = model_prompting(config.param.base_llm, prompt, max_token_num=config.param.max_token_num)[0]
-    import pdb; pdb.set_trace()
     return response
 
 
@@ -322,12 +326,90 @@ def write_proposal_sakana_ai_scientist(
         else:
             return conversation[-1]['content'].split('I am done')[0]
 
+def write_proposal_debug(profiles: List[Profile], ref_contents: List[str], config: Config) -> str:
+    random.shuffle(ref_contents)
+    ref_strs = ''
+    for idx, ref in enumerate(ref_contents):
+        if ref is None:
+            continue
+        ref_strs += f'paper {idx + 1}. {ref}\n'
+    profile = profiles[0]
+
+    prompt = [
+        {
+            'role': 'user',
+            'content': (
+                'Here is a high-level summarized insight of a research field Machine Learning.\n\n'
+                'Here are the five core questions:\n\n'
+                '[Question 1] - What is the problem?\n\n'
+                'Formulate the specific research question you aim to address. Only output one question and do not include any more information.\n\n'
+                '[Question 2] - Why is it interesting and important?\n\n'
+                'Explain the broader implications of solving this problem for the research community.\n'
+                'Discuss how such paper will affect the future research.\n'
+                'Discuss how addressing this question could advance knowledge or lead to practical applications.\n\n'
+                '[Question 3] - Why is it hard?\n\n'
+                'Discuss the challenges and complexities involved in solving this problem.\n'
+                'Explain why naive or straightforward approaches may fail.\n'
+                'Identify any technical, theoretical, or practical obstacles that need to be overcome. MAKE IT CLEAR.\n\n'
+                "[Question 4] - Why hasn't it been solved before?\n\n"
+                'Identify gaps or limitations in previous research or existing solutions.\n'
+                'Discuss any barriers that have prevented this problem from being solved until now.\n'
+                'Explain how your approach differs from or improves upon prior work. MAKE IT CLEAR.\n\n'
+                '[Question 5] - What are the key components of my approach and results?\n\n'
+                'Outline your proposed methodology in detail, including the method, dataset, metric that you plan to use.\n'
+                'Describe the expected outcomes. MAKE IT CLEAR.\n\n'
+                f'Contents collect from cited papers:\n{ref_strs}\n\n'
+                'Please brainstorm a following proposal with the given format.'
+            ),
+        }
+    ]
+    generated_5q = model_prompting(config.param.base_llm, prompt, max_token_num=config.param.max_token_num)[0]
+    # delete all things after [Question 5]
+    generated_4q = generated_5q.split('[Question 5]')[0]
+
+    prompt = [
+        {
+            'role': 'user',
+            'content': (
+                'Here is a high-level summarized insight of a research field Machine Learning.\n\n'
+                'Here are the five core questions:\n\n'
+                '[Question 1] - What is the problem?\n\n'
+                'Formulate the specific research question you aim to address. Only output one question and do not include any more information.\n\n'
+                '[Question 2] - Why is it interesting and important?\n\n'
+                'Explain the broader implications of solving this problem for the research community.\n'
+                'Discuss how such paper will affect the future research.\n'
+                'Discuss how addressing this question could advance knowledge or lead to practical applications.\n\n'
+                '[Question 3] - Why is it hard?\n\n'
+                'Discuss the challenges and complexities involved in solving this problem.\n'
+                'Explain why naive or straightforward approaches may fail.\n'
+                'Identify any technical, theoretical, or practical obstacles that need to be overcome. MAKE IT CLEAR.\n\n'
+                "[Question 4] - Why hasn't it been solved before?\n\n"
+                'Identify gaps or limitations in previous research or existing solutions.\n'
+                'Discuss any barriers that have prevented this problem from being solved until now.\n'
+                'Explain how your approach differs from or improves upon prior work. MAKE IT CLEAR.\n\n'
+                '[Question 5] - What are the key components of my approach and results?\n\n'
+                'Outline your proposed methodology in detail, including the method, dataset, metric that you plan to use.\n'
+                'Describe the expected outcomes. MAKE IT CLEAR.\n\n'
+                f'Contents collect from cited papers:\n{ref_strs}\n\n'
+                'This is the generated [Question 1] to [Question 4] based on the citation papers.\n'
+                f'{generated_4q}\n\n'
+                'You are a researcher who the bio is as follows:\n'
+                f'{profile.bio}\n\n'
+                'When you are generating [Question 5], you can think how to do the thing based on your bio information.\n'
+                'Please brainstorm a following proposal with the given format. You should still start with [Question 1] to [Question 5]. But the content from [Question 1] to [Question 4] is already given and you just copy them as part of the output.'
+            ),
+        }
+    ]
+    response = model_prompting(config.param.base_llm, prompt, max_token_num=config.param.max_token_num)[0]
+    return response
+
 
 def write_proposal(
     mode: str,
     profiles: List[Profile],
     ref_contents: List[str],
     config: Config,
+    target_paper_title: str,
 ) -> str:
     if mode == 'zero_shot':
         return write_proposal_zero_shot(config=config)
@@ -335,7 +417,7 @@ def write_proposal(
         return write_proposal_with_only_profiles(profiles=profiles, config=config)
     elif mode == 'citation_only':
         return write_proposal_with_only_citations(
-            ref_contents=ref_contents, config=config
+            ref_contents=ref_contents, config=config,
         )
     elif mode == 'author_citation':
         return write_proposal_with_profiles_and_citations(
@@ -349,5 +431,9 @@ def write_proposal(
         return write_proposal_sakana_ai_scientist(
             ref_contents=ref_contents, config=config, num_reflections=5
         )
+    elif mode == 'debug':
+        return write_proposal_debug(
+            profiles=profiles, ref_contents=ref_contents, config=config,
+        )
     else:
         raise ValueError(f'Invalid proposal writing mode: {mode}')
diff --git a/research_bench/run_eval.py b/research_bench/run_eval.py
index c2d7bbca..d50ea8cd 100644
--- a/research_bench/run_eval.py
+++ b/research_bench/run_eval.py
@@ -25,7 +25,8 @@ def inference(
     profiles = [Profile(**data) for data in author_data.values()]
     ref_abstracts = [ref['abstract'] for ref in paper_data.get('references', [])]
 
-    gen_proposal = write_proposal(mode, profiles, ref_abstracts, config)
+    paper_title = paper_data['title']
+    gen_proposal = write_proposal(mode, profiles, ref_abstracts, config, paper_title)
 
     metrics = compute_proposal_metrics(ref_proposal, gen_proposal)
     results = {
@@ -81,6 +82,7 @@ def main() -> None:
             'author_citation',
             'research_town',
             'sakana_ai_scientist',
+            'debug'
         ],
         help='Processing mode',
     )
diff --git a/research_bench/run_eval.sh b/research_bench/run_eval.sh
index 80e1ae57..68513e9f 100755
--- a/research_bench/run_eval.sh
+++ b/research_bench/run_eval.sh
@@ -3,7 +3,7 @@
 # Define the input and output paths, along with the modes to test
 INPUT_PATH="./mlbench/mlbench_full.json"
 OUTPUT_DIR="./results"
-MODES=("research_town")
+MODES=("debug")
 NUM_PROCESSES=4
 
 # Loop through each mode and run the evaluation

From 7ec07f7fe5c142eeaf511caf83971e0daacd1e3f Mon Sep 17 00:00:00 2001
From: Haofei Yu <1125027232@qq.com>
Date: Wed, 27 Nov 2024 02:39:57 -0600
Subject: [PATCH 05/11] commit

---
 research_bench/proposal_writing.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/research_bench/proposal_writing.py b/research_bench/proposal_writing.py
index c0820fce..238028c0 100644
--- a/research_bench/proposal_writing.py
+++ b/research_bench/proposal_writing.py
@@ -129,6 +129,7 @@ def write_proposal_with_only_profiles(profiles: List[Profile], config: Config) -
 
 
 def write_proposal_with_only_citations(ref_contents: List[str], config: Config) -> str:
+    random.seed(0)
     random.shuffle(ref_contents)
     ref_strs = ''
     for idx, ref in enumerate(ref_contents):
@@ -327,6 +328,7 @@ def write_proposal_sakana_ai_scientist(
             return conversation[-1]['content'].split('I am done')[0]
 
 def write_proposal_debug(profiles: List[Profile], ref_contents: List[str], config: Config) -> str:
+    random.seed(0)
     random.shuffle(ref_contents)
     ref_strs = ''
     for idx, ref in enumerate(ref_contents):

From 2b7cbdeea74bf09f3f36f2a46f09ab303b9ab355 Mon Sep 17 00:00:00 2001
From: Haofei Yu <1125027232@qq.com>
Date: Wed, 27 Nov 2024 03:14:00 -0600
Subject: [PATCH 06/11] commit

---
 research_bench/eval_only.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/research_bench/eval_only.py b/research_bench/eval_only.py
index ad39dc67..d83b25a6 100644
--- a/research_bench/eval_only.py
+++ b/research_bench/eval_only.py
@@ -28,6 +28,7 @@
     'voyageai_sim_q5': [],
 }
 
+dataset = dataset[:100]
 for data in tqdm(dataset):
     ref_proposal = data['ref_proposal']
     gen_proposal = data['gen_proposal']

From 4cf20fcca6a4553dbb3f5938528cd3ee9ab97937 Mon Sep 17 00:00:00 2001
From: Haofei Yu <1125027232@qq.com>
Date: Wed, 27 Nov 2024 03:28:33 -0600
Subject: [PATCH 07/11] commit

---
 research_bench/eval_only.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/research_bench/eval_only.py b/research_bench/eval_only.py
index d83b25a6..b9c96375 100644
--- a/research_bench/eval_only.py
+++ b/research_bench/eval_only.py
@@ -4,7 +4,7 @@
 import json
 
 dataset = []
-with open('./results/mlbench_result_4o_mini_citation_only.jsonl', 'r') as f:
+with open('./results/mlbench_result_4o_mini_citation_only_consider_lower_than_debug.jsonl', 'r') as f:
     for line_num, line in enumerate(f, 1):
         try:
             obj = json.loads(line)

From 69801958c2a2eb8eee2f2be24bf1b9b5a97e2a95 Mon Sep 17 00:00:00 2001
From: Haofei Yu <1125027232@qq.com>
Date: Wed, 27 Nov 2024 07:12:59 -0600
Subject: [PATCH 08/11] update

---
 research_bench/eval_only.py        |   2 +-
 research_bench/proposal_writing.py | 165 +++++++++++++++++++++++++++--
 research_bench/run_eval.py         |   3 +-
 research_bench/run_eval.sh         |   2 +-
 4 files changed, 161 insertions(+), 11 deletions(-)

diff --git a/research_bench/eval_only.py b/research_bench/eval_only.py
index b9c96375..d83b25a6 100644
--- a/research_bench/eval_only.py
+++ b/research_bench/eval_only.py
@@ -4,7 +4,7 @@
 import json
 
 dataset = []
-with open('./results/mlbench_result_4o_mini_citation_only_consider_lower_than_debug.jsonl', 'r') as f:
+with open('./results/mlbench_result_4o_mini_citation_only.jsonl', 'r') as f:
     for line_num, line in enumerate(f, 1):
         try:
             obj = json.loads(line)
diff --git a/research_bench/proposal_writing.py b/research_bench/proposal_writing.py
index 238028c0..96e88e29 100644
--- a/research_bench/proposal_writing.py
+++ b/research_bench/proposal_writing.py
@@ -117,7 +117,7 @@ def write_proposal_with_only_profiles(profiles: List[Profile], config: Config) -
                 'Discuss any barriers that have prevented this problem from being solved until now.\n'
                 'Explain how your approach differs from or improves upon prior work. MAKE IT CLEAR.\n\n'
                 '[Question 5] - What are the key components of my approach and results?\n\n'
-                'Outline your proposed methodology in detail, including the method, dataset, metric that you plan to use.\n'
+                'Outline your proposed methodology in detail, including the method, dataset, metric that you plan to use. But you must include these in one paragraph and not use subtitles.\n'
                 'Describe the expected outcomes. MAKE IT CLEAR.\n\n'
                 f'Author biographies and personas:\n{bio_strs}\n\n'
                 'You are the profiles of this paper. Please provide the five core questions contents for a brand new future research based on the above biographies.'
@@ -158,7 +158,7 @@ def write_proposal_with_only_citations(ref_contents: List[str], config: Config)
                 'Discuss any barriers that have prevented this problem from being solved until now.\n'
                 'Explain how your approach differs from or improves upon prior work. MAKE IT CLEAR.\n\n'
                 '[Question 5] - What are the key components of my approach and results?\n\n'
-                'Outline your proposed methodology in detail, including the method, dataset, metric that you plan to use.\n'
+                'Outline your proposed methodology in detail, including the method, dataset, metric that you plan to use. But you must include these in one paragraph and not use subtitles.\n'
                 'Describe the expected outcomes. MAKE IT CLEAR.\n\n'
                 f'Contents collect from cited papers:\n{ref_strs}\n\n'
                 'Please brainstorm a following proposal with the given format.'
@@ -335,7 +335,7 @@ def write_proposal_debug(profiles: List[Profile], ref_contents: List[str], confi
         if ref is None:
             continue
         ref_strs += f'paper {idx + 1}. {ref}\n'
-    profile = profiles[0]
+    profile_str = '\n'.join([profile.bio for profile in profiles])
 
     prompt = [
         {
@@ -358,7 +358,7 @@ def write_proposal_debug(profiles: List[Profile], ref_contents: List[str], confi
                 'Discuss any barriers that have prevented this problem from being solved until now.\n'
                 'Explain how your approach differs from or improves upon prior work. MAKE IT CLEAR.\n\n'
                 '[Question 5] - What are the key components of my approach and results?\n\n'
-                'Outline your proposed methodology in detail, including the method, dataset, metric that you plan to use.\n'
+                'Outline your proposed methodology in detail, including the method, dataset, metric that you plan to use. But you must include these in one paragraph and not use subtitles.\n'
                 'Describe the expected outcomes. MAKE IT CLEAR.\n\n'
                 f'Contents collect from cited papers:\n{ref_strs}\n\n'
                 'Please brainstorm a following proposal with the given format.'
@@ -390,13 +390,12 @@ def write_proposal_debug(profiles: List[Profile], ref_contents: List[str], confi
                 'Discuss any barriers that have prevented this problem from being solved until now.\n'
                 'Explain how your approach differs from or improves upon prior work. MAKE IT CLEAR.\n\n'
                 '[Question 5] - What are the key components of my approach and results?\n\n'
-                'Outline your proposed methodology in detail, including the method, dataset, metric that you plan to use.\n'
+                'Outline your proposed methodology in detail, including the method, dataset, metric that you plan to use. But you must include these in one paragraph and not use subtitles.\n'
                 'Describe the expected outcomes. MAKE IT CLEAR.\n\n'
-                f'Contents collect from cited papers:\n{ref_strs}\n\n'
                 'This is the generated [Question 1] to [Question 4] based on the citation papers.\n'
                 f'{generated_4q}\n\n'
-                'You are a researcher who the bio is as follows:\n'
-                f'{profile.bio}\n\n'
+                'You have a group of researchers who the bio is as follows:\n'
+                f'{profile_str}\n\n'
                 'When you are generating [Question 5], you can think how to do the thing based on your bio information.\n'
                 'Please brainstorm a following proposal with the given format. You should still start with [Question 1] to [Question 5]. But the content from [Question 1] to [Question 4] is already given and you just copy them as part of the output.'
             ),
@@ -405,6 +404,152 @@ def write_proposal_debug(profiles: List[Profile], ref_contents: List[str], confi
     response = model_prompting(config.param.base_llm, prompt, max_token_num=config.param.max_token_num)[0]
     return response
 
+import random
+from typing import List
+from voyageai import Client
+
+def fuse_questions(context: str, question_5_candidates: List[str], config: Config) -> str:
+    """
+    Fuse multiple [Question 5] candidates into a single, coherent [Question 5].
+
+    Args:
+        context (str): The context containing [Question 1] to [Question 4].
+        question_5_candidates (List[str]): List of candidate [Question 5] responses.
+        config (Config): Configuration object with LLM parameters.
+
+    Returns:
+        str: A fused and coherent [Question 5].
+    """
+    prompt = [
+        {
+            'role': 'user',
+            'content': (
+                f"Here are the first four questions from a research proposal:\n\n"
+                f"{context}\n\n"
+                f"Below are multiple versions of [Question 5] generated by different researchers:\n\n"
+                + "\n\n".join([f"Version {i+1}:\n{q}" for i, q in enumerate(question_5_candidates)]) +
+                "\n\n"
+                f"Your task is to fuse these [Question 5] versions into a single, clear, and coherent [Question 5].\n"
+                f"Consider the following when fusing:\n"
+                f"1. Relevance: Align the fused [Question 5] with [Question 1] to [Question 4].\n"
+                f"2. Clarity: Ensure the fused [Question 5] is well-written and easy to understand.\n"
+                f"3. Completeness: Ensure the fused [Question 5] covers all key elements of the proposed methodology and outcomes.\n\n"
+                f"Output only the fused [Question 5]."
+            )
+        }
+    ]
+    
+    # Use the LLM to generate the fused [Question 5]
+    fused_response = model_prompting(config.param.base_llm, prompt, max_token_num=config.param.max_token_num)
+    return fused_response[0].strip()
+
+
+def write_proposal_fake_researchtown(
+    profiles: List[Profile],
+    ref_contents: List[str],
+    config: Config,
+) -> str:
+    random.seed(0)
+    random.shuffle(ref_contents)
+    # Initialize Voyage AI client
+    voyage_client = Client(api_key="pa-I6kOGpyDmf02kIxC0fBMc8WXIraV_oRyR4uDvpNH7n0")
+
+    # Rerank references
+    def rerank_references(query: str, refs: List[str], top_k: int = 5):
+        results = voyage_client.rerank(query, refs, model="rerank-2", top_k=top_k)
+        return [result.document for result in results.results]
+
+    # Generate the first set of questions [Question 1] to [Question 4]
+    ref_strs = '\n'.join([f'paper {idx + 1}. {ref}' for idx, ref in enumerate(ref_contents) if ref])
+    prompt = [
+        {
+            'role': 'user',
+            'content': (
+                f"Here is a high-level summarized insight of a research field Machine Learning.\n\n"
+                f"Here are the five core questions:\n\n"
+                f"[Question 1] - What is the problem?\n\n"
+                f"Formulate the specific research question you aim to address. Only output one question and do not include any more information.\n\n"
+                f"[Question 2] - Why is it interesting and important?\n\n"
+                f"Explain the broader implications of solving this problem for the research community.\n"
+                f"Discuss how such paper will affect the future research.\n"
+                f"Discuss how addressing this question could advance knowledge or lead to practical applications.\n\n"
+                f"[Question 3] - Why is it hard?\n\n"
+                f"Discuss the challenges and complexities involved in solving this problem.\n"
+                f"Explain why naive or straightforward approaches may fail.\n"
+                f"Identify any technical, theoretical, or practical obstacles that need to be overcome. MAKE IT CLEAR.\n\n"
+                f"[Question 4] - Why hasn't it been solved before?\n\n"
+                f"Identify gaps or limitations in previous research or existing solutions.\n"
+                f"Discuss any barriers that have prevented this problem from being solved until now.\n"
+                f"Explain how your approach differs from or improves upon prior work. MAKE IT CLEAR.\n\n"
+                f"Contents collect from cited papers:\n{ref_strs}\n\n"
+                f"Please brainstorm a following proposal with the given format."
+            ),
+        }
+    ]
+    generated_5q = model_prompting(config.param.base_llm, prompt, max_token_num=config.param.max_token_num)[0]
+    generated_4q = generated_5q.split('[Question 5]')[0]
+
+    question_5_candidates = []
+
+    profiles = profiles[:1] + profiles[-1:]
+    # Generate [Question 5] for each bio
+    for profile in profiles:
+        # Rerank references for the current profile
+        ref_contents = [ref for ref in ref_contents if ref is not None]
+        print(len(ref_contents))
+        if ref_contents == []:
+            top_refs = []
+            ref_strs = ''
+        else:
+            top_refs = rerank_references(profile.bio, ref_contents, top_k=5)
+            ref_strs = '\n'.join([f'paper {idx + 1}. {ref}' for idx, ref in enumerate(top_refs)])
+        
+        # Generate prompt for [Question 5]
+        prompt = [
+            {
+                'role': 'user',
+                'content': (
+                    f"Here is a high-level summarized insight of a research field Machine Learning.\n\n"
+                    f"Here are the five core questions:\n\n"
+                    f"[Question 1] - What is the problem?\n\n"
+                    f"Formulate the specific research question you aim to address. Only output one question and do not include any more information.\n\n"
+                    f"[Question 2] - Why is it interesting and important?\n\n"
+                    f"Explain the broader implications of solving this problem for the research community.\n"
+                    f"Discuss how such paper will affect the future research.\n"
+                    f"Discuss how addressing this question could advance knowledge or lead to practical applications.\n\n"
+                    f"[Question 3] - Why is it hard?\n\n"
+                    f"Discuss the challenges and complexities involved in solving this problem.\n"
+                    f"Explain why naive or straightforward approaches may fail.\n"
+                    f"Identify any technical, theoretical, or practical obstacles that need to be overcome. MAKE IT CLEAR.\n\n"
+                    f"[Question 4] - Why hasn't it been solved before?\n\n"
+                    f"Identify gaps or limitations in previous research or existing solutions.\n"
+                    f"Discuss any barriers that have prevented this problem from being solved until now.\n"
+                    f"Explain how your approach differs from or improves upon prior work. MAKE IT CLEAR.\n\n"
+                    f"[Question 5] - What are the key components of my approach and results?\n\n"
+                    f"Outline your proposed methodology in detail, including the method, dataset, metric that you plan to use. But you must include these in one paragraph and not use subtitles.\n"
+                    f"Describe the expected outcomes. MAKE IT CLEAR.\n\n"
+                    f"This is the generated [Question 1] to [Question 4] based on the citation papers.\n"
+                    f"{generated_4q}\n\n"
+                    f"You have a group of researchers who the bio is as follows:\n"
+                    f"{profile.bio}\n\n"
+                    f"Contents collected from top reranked papers:\n{ref_strs}\n\n"
+                    f"Please brainstorm a following proposal with the given format."
+                ),
+            }
+        ]
+        question_5_response = model_prompting(config.param.base_llm, prompt, max_token_num=config.param.max_token_num)[0]
+        question_5 = question_5_response.split('[Question 5]')[1]
+        question_5_candidates.append(question_5)
+
+    # Fuse all [Question 5] candidates into a single response
+    fused_question_5 = fuse_questions(generated_4q, question_5_candidates, config)
+
+    # Combine with [Question 1]-[Question 4]
+    final_5q = f"{generated_4q}{fused_question_5}"
+
+    return final_5q
+
+
 
 def write_proposal(
     mode: str,
@@ -437,5 +582,9 @@ def write_proposal(
         return write_proposal_debug(
             profiles=profiles, ref_contents=ref_contents, config=config,
         )
+    elif mode == 'fake_research_town':
+        return write_proposal_fake_researchtown(
+            profiles=profiles, ref_contents=ref_contents, config=config
+        )
     else:
         raise ValueError(f'Invalid proposal writing mode: {mode}')
diff --git a/research_bench/run_eval.py b/research_bench/run_eval.py
index d50ea8cd..e060221b 100644
--- a/research_bench/run_eval.py
+++ b/research_bench/run_eval.py
@@ -82,7 +82,8 @@ def main() -> None:
             'author_citation',
             'research_town',
             'sakana_ai_scientist',
-            'debug'
+            'debug',
+            'fake_research_town',
         ],
         help='Processing mode',
     )
diff --git a/research_bench/run_eval.sh b/research_bench/run_eval.sh
index 68513e9f..093ec410 100755
--- a/research_bench/run_eval.sh
+++ b/research_bench/run_eval.sh
@@ -3,7 +3,7 @@
 # Define the input and output paths, along with the modes to test
 INPUT_PATH="./mlbench/mlbench_full.json"
 OUTPUT_DIR="./results"
-MODES=("debug")
+MODES=("fake_research_town")
 NUM_PROCESSES=4
 
 # Loop through each mode and run the evaluation

From feb4675d57a5ec656619225fbaa00b5a17a82a1c Mon Sep 17 00:00:00 2001
From: Haofei Yu <1125027232@qq.com>
Date: Wed, 27 Nov 2024 13:59:20 -0600
Subject: [PATCH 09/11] update

---
 research_bench/eval_only.py        | 2 +-
 research_bench/proposal_writing.py | 5 ++---
 2 files changed, 3 insertions(+), 4 deletions(-)

diff --git a/research_bench/eval_only.py b/research_bench/eval_only.py
index d83b25a6..281e364d 100644
--- a/research_bench/eval_only.py
+++ b/research_bench/eval_only.py
@@ -28,7 +28,7 @@
     'voyageai_sim_q5': [],
 }
 
-dataset = dataset[:100]
+dataset = dataset[:70]
 for data in tqdm(dataset):
     ref_proposal = data['ref_proposal']
     gen_proposal = data['gen_proposal']
diff --git a/research_bench/proposal_writing.py b/research_bench/proposal_writing.py
index 96e88e29..346f5734 100644
--- a/research_bench/proposal_writing.py
+++ b/research_bench/proposal_writing.py
@@ -496,7 +496,6 @@ def rerank_references(query: str, refs: List[str], top_k: int = 5):
     for profile in profiles:
         # Rerank references for the current profile
         ref_contents = [ref for ref in ref_contents if ref is not None]
-        print(len(ref_contents))
         if ref_contents == []:
             top_refs = []
             ref_strs = ''
@@ -530,7 +529,7 @@ def rerank_references(query: str, refs: List[str], top_k: int = 5):
                     f"Describe the expected outcomes. MAKE IT CLEAR.\n\n"
                     f"This is the generated [Question 1] to [Question 4] based on the citation papers.\n"
                     f"{generated_4q}\n\n"
-                    f"You have a group of researchers who the bio is as follows:\n"
+                    f"You have a researcher who the bio is as follows:\n"
                     f"{profile.bio}\n\n"
                     f"Contents collected from top reranked papers:\n{ref_strs}\n\n"
                     f"Please brainstorm a following proposal with the given format."
@@ -545,7 +544,7 @@ def rerank_references(query: str, refs: List[str], top_k: int = 5):
     fused_question_5 = fuse_questions(generated_4q, question_5_candidates, config)
 
     # Combine with [Question 1]-[Question 4]
-    final_5q = f"{generated_4q}{fused_question_5}"
+    final_5q = f"{generated_4q}\n\n{fused_question_5}"
 
     return final_5q
 

From 05f7b2c239e2f00905661e85701cba68f308f877 Mon Sep 17 00:00:00 2001
From: Haofei Yu <1125027232@qq.com>
Date: Wed, 27 Nov 2024 15:16:06 -0600
Subject: [PATCH 10/11] update

---
 research_bench/eval_only.py        | 4 ++--
 research_bench/proposal_writing.py | 2 +-
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/research_bench/eval_only.py b/research_bench/eval_only.py
index 281e364d..eed74a94 100644
--- a/research_bench/eval_only.py
+++ b/research_bench/eval_only.py
@@ -4,7 +4,7 @@
 import json
 
 dataset = []
-with open('./results/mlbench_result_4o_mini_citation_only.jsonl', 'r') as f:
+with open('./results/mlbench_result_4o_mini_fake_research_town.jsonl', 'r') as f:
     for line_num, line in enumerate(f, 1):
         try:
             obj = json.loads(line)
@@ -28,7 +28,7 @@
     'voyageai_sim_q5': [],
 }
 
-dataset = dataset[:70]
+dataset = dataset[:100]
 for data in tqdm(dataset):
     ref_proposal = data['ref_proposal']
     gen_proposal = data['gen_proposal']
diff --git a/research_bench/proposal_writing.py b/research_bench/proposal_writing.py
index 346f5734..79095cff 100644
--- a/research_bench/proposal_writing.py
+++ b/research_bench/proposal_writing.py
@@ -491,7 +491,7 @@ def rerank_references(query: str, refs: List[str], top_k: int = 5):
 
     question_5_candidates = []
 
-    profiles = profiles[:1] + profiles[-1:]
+    profiles = profiles[:1]
     # Generate [Question 5] for each bio
     for profile in profiles:
         # Rerank references for the current profile

From 11e9707b01d4d0ccf77f99040d33d75206c75902 Mon Sep 17 00:00:00 2001
From: Haofei Yu <1125027232@qq.com>
Date: Wed, 27 Nov 2024 17:16:39 -0600
Subject: [PATCH 11/11] update

---
 research_bench/eval_only.py        | 12 ++++++++++--
 research_bench/proposal_writing.py |  4 ++--
 2 files changed, 12 insertions(+), 4 deletions(-)

diff --git a/research_bench/eval_only.py b/research_bench/eval_only.py
index eed74a94..71ea16b0 100644
--- a/research_bench/eval_only.py
+++ b/research_bench/eval_only.py
@@ -4,7 +4,7 @@
 import json
 
 dataset = []
-with open('./results/mlbench_result_4o_mini_fake_research_town.jsonl', 'r') as f:
+with open('./results/mlbench_result_4o_mini_fake_research_town_first_author_only.jsonl', 'r') as f:
     for line_num, line in enumerate(f, 1):
         try:
             obj = json.loads(line)
@@ -39,5 +39,13 @@
     for key in overall_metrics.keys():
         overall_metrics[key].append(data[key])
 
+final_metrics = {}
 for key, values in overall_metrics.items():
-    print(f'{key}: {sum(values) / len(values)}')
\ No newline at end of file
+    print(f'{key}: {sum(values) / len(values)}')
+    final_metrics[key] = sum(values) / len(values)
+
+
+openai_metric = 0.1 * final_metrics['openai_sim_q1'] + 0.1 * final_metrics['openai_sim_q2'] + 0.1 * final_metrics['openai_sim_q3'] + 0.1 * final_metrics['openai_sim_q4'] + 0.6 * final_metrics['openai_sim_q5']
+voyageai_metric = 0.1 * final_metrics['voyageai_sim_q1'] + 0.1 * final_metrics['voyageai_sim_q2'] + 0.1 * final_metrics['voyageai_sim_q3'] + 0.1 * final_metrics['voyageai_sim_q4'] + 0.6 * final_metrics['voyageai_sim_q5']
+print(f'openai_metric: {openai_metric}')
+print(f'voyageai_metric: {voyageai_metric}')
\ No newline at end of file
diff --git a/research_bench/proposal_writing.py b/research_bench/proposal_writing.py
index 79095cff..8e2fd4e5 100644
--- a/research_bench/proposal_writing.py
+++ b/research_bench/proposal_writing.py
@@ -83,7 +83,7 @@ def write_proposal_zero_shot(config: Config) -> str:
                 'Discuss any barriers that have prevented this problem from being solved until now.\n'
                 'Explain how your approach differs from or improves upon prior work. MAKE IT CLEAR.\n\n'
                 '[Question 5] - What are the key components of my approach and results?\n\n'
-                'Outline your proposed methodology in detail, including the method, dataset, metric that you plan to use.\n'
+                'Outline your proposed methodology in detail, including the method, dataset, metric that you plan to use. But you must include these in one paragraph and not use subtitles.\n'
                 'Describe the expected outcomes. MAKE IT CLEAR.\n\n'
                 'Please provide the five core questions contents for a brand new future research that you think are the most promising one.'
             ),
@@ -491,7 +491,7 @@ def rerank_references(query: str, refs: List[str], top_k: int = 5):
 
     question_5_candidates = []
 
-    profiles = profiles[:1]
+    #profiles = profiles[:1]
     # Generate [Question 5] for each bio
     for profile in profiles:
         # Rerank references for the current profile