Skip to content

Commit 1c726c2

Browse files
[PB] Rollout fixes - use responses API for o3/o4 and preserve log files (#25)
1 parent 74e1d9c commit 1c726c2

File tree

3 files changed

+6
-2
lines changed

3 files changed

+6
-2
lines changed

project/paperbench/paperbench/agents/aisi-basic-agent/_basic_agent_iterative.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -189,6 +189,8 @@ async def solve(state: TaskState, generate: Generate) -> TaskState:
189189
model = get_model()
190190
setattr(model, "total_retry_time", 0)
191191
setattr(model, "generate", generate_patched)
192+
if "o3" in model.api.model_name or "o4" in model.api.model_name:
193+
model.api.responses_api = True
192194

193195
# main loop (state.completed checks message_limit and token_limit)
194196
while not state.completed:

project/paperbench/paperbench/agents/aisi-basic-agent/_basic_agent_plus.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -195,6 +195,8 @@ async def solve(state: TaskState, generate: Generate) -> TaskState:
195195
model = get_model()
196196
setattr(model, "total_retry_time", 0)
197197
setattr(model, "generate", generate_patched)
198+
if "o3" in model.api.model_name or "o4" in model.api.model_name:
199+
model.api.responses_api = True
198200

199201
# main loop (state.completed checks message_limit and token_limit)
200202
while not state.completed:
@@ -341,4 +343,4 @@ async def solve(state: TaskState, generate: Generate) -> TaskState:
341343
end_task_tool(),
342344
basic_agent_loop(),
343345
]
344-
)
346+
)

project/paperbench/paperbench/infra/alcatraz.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,7 @@ async def populate_exclude_list(
2727
cmds = [
2828
f"MAX_SIZE={max_size}",
2929
f"EXCLUDE_LIST={exclude_list_path}",
30-
f"find {dir_path_on_computer} -type f -size +$MAX_SIZE -printf '%P\\n' > $EXCLUDE_LIST",
30+
f"find {dir_path_on_computer} -type f -not -name 'agent.log' -not -name 'inspect.log' -size +$MAX_SIZE -printf '%P\\n' > $EXCLUDE_LIST",
3131
"cat $EXCLUDE_LIST",
3232
]
3333
excluded = await computer.check_shell_command(" && ".join(cmds))

0 commit comments

Comments
 (0)