diff --git a/project/paperbench/paperbench/agents/aisi-basic-agent/_basic_agent_iterative.py b/project/paperbench/paperbench/agents/aisi-basic-agent/_basic_agent_iterative.py index c09db20..4fd5e1a 100644 --- a/project/paperbench/paperbench/agents/aisi-basic-agent/_basic_agent_iterative.py +++ b/project/paperbench/paperbench/agents/aisi-basic-agent/_basic_agent_iterative.py @@ -189,6 +189,8 @@ async def solve(state: TaskState, generate: Generate) -> TaskState: model = get_model() setattr(model, "total_retry_time", 0) setattr(model, "generate", generate_patched) + if "o3" in model.api.model_name or "o4" in model.api.model_name: + model.api.responses_api = True # main loop (state.completed checks message_limit and token_limit) while not state.completed: diff --git a/project/paperbench/paperbench/agents/aisi-basic-agent/_basic_agent_plus.py b/project/paperbench/paperbench/agents/aisi-basic-agent/_basic_agent_plus.py index 5ad75aa..edb3701 100644 --- a/project/paperbench/paperbench/agents/aisi-basic-agent/_basic_agent_plus.py +++ b/project/paperbench/paperbench/agents/aisi-basic-agent/_basic_agent_plus.py @@ -195,6 +195,8 @@ async def solve(state: TaskState, generate: Generate) -> TaskState: model = get_model() setattr(model, "total_retry_time", 0) setattr(model, "generate", generate_patched) + if "o3" in model.api.model_name or "o4" in model.api.model_name: + model.api.responses_api = True # main loop (state.completed checks message_limit and token_limit) while not state.completed: @@ -341,4 +343,4 @@ async def solve(state: TaskState, generate: Generate) -> TaskState: end_task_tool(), basic_agent_loop(), ] - ) + ) \ No newline at end of file diff --git a/project/paperbench/paperbench/infra/alcatraz.py b/project/paperbench/paperbench/infra/alcatraz.py index 1e12e8e..5e18b20 100644 --- a/project/paperbench/paperbench/infra/alcatraz.py +++ b/project/paperbench/paperbench/infra/alcatraz.py @@ -27,7 +27,7 @@ async def populate_exclude_list( cmds = [ f"MAX_SIZE={max_size}", f"EXCLUDE_LIST={exclude_list_path}", - f"find {dir_path_on_computer} -type f -size +$MAX_SIZE -printf '%P\\n' > $EXCLUDE_LIST", + f"find {dir_path_on_computer} -type f -not -name 'agent.log' -not -name 'inspect.log' -size +$MAX_SIZE -printf '%P\\n' > $EXCLUDE_LIST", "cat $EXCLUDE_LIST", ] excluded = await computer.check_shell_command(" && ".join(cmds))