Skip to content

Commit 5dd23f1

Browse files
feat: 1. support smarter way to detect critical error;
fix: 1. increase executable steps per case;
1 parent a734ae2 commit 5dd23f1

File tree

1 file changed

+6
-13
lines changed

1 file changed

+6
-13
lines changed

webqa_agent/testers/case_gen/agents/execute_agent.py

Lines changed: 6 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@
2121
from webqa_agent.testers.case_gen.utils.message_converter import convert_intermediate_steps_to_messages
2222
from webqa_agent.utils.log_icon import icon
2323

24-
LONG_STEPS = 25
24+
LONG_STEPS = 30
2525

2626
# ============================================================================
2727
# Critical Failure Detection Patterns
@@ -739,24 +739,17 @@ def extract_path(u):
739739
logging.debug(f"Step {i+1} tool output: {tool_output}")
740740
messages.append(AIMessage(content=tool_output))
741741

742-
# Check for failures in the tool output
743-
if "[failure]" in result['intermediate_steps'][0][1].lower() or "failed" in tool_output.lower():
744-
failed_steps.append(i + 1)
745-
logging.warning(f"Step {i+1} detected as failed based on output")
746-
747742
# Check for critical failures that should immediately stop execution
748743
if _is_critical_failure_step(tool_output, instruction_to_execute):
749744
failed_steps.append(i + 1)
750-
final_summary = f"FINAL_SUMMARY: Critical failure at step {i+1}: '{instruction_to_execute}'. Error details: {tool_output[:200]}..."
751-
logging.error(f"Critical failure detected at step {i+1}, aborting remaining steps to save time")
745+
final_summary = f"FINAL_SUMMARY: Critical failure at step {i + 1}: '{instruction_to_execute}'. Error details: {tool_output[:200]}..."
746+
logging.error(f"Critical failure detected at step {i + 1}, aborting remaining steps to save time")
752747
break
753748

754-
# Check for max iterations, which indicates a failure to complete the step.
755-
if "Agent stopped due to max iterations." in tool_output:
749+
# Check for failures in the tool output
750+
if "[failure]" in result['intermediate_steps'][0][1].lower() or "failed" in tool_output.lower():
756751
failed_steps.append(i + 1)
757-
final_summary = f"FINAL_SUMMARY: Step '{instruction_to_execute}' failed after multiple retries. The agent could not complete the instruction. Last output: {tool_output}"
758-
logging.error(f"Step {i+1} failed due to max iterations.")
759-
break
752+
logging.warning(f"Step {i+1} detected as failed based on output")
760753

761754
# Check for objective achievement signal
762755
is_achieved, achievement_reason = _is_objective_achieved(tool_output)

0 commit comments

Comments
 (0)