refactor: simplify OpenAI engine parameter handling and fix HLE output directory

yayashuxue · claude · yayashuxue · commit 9aa1e9768453 · 2025-10-07T20:27:40.000-07:00
- Simplified unsupported parameter handling in OpenAIEngine from 210 to 132 lines - Removed complex parse_openai_error_for_unsupported_param function and duplicate code - Extracted common logic into single _fix_unsupported_param helper method - Fixed HLE evaluation script to always output to examples/deepresearch/hle_outputs/ - Ensures outputs go to gitignored location regardless of where script is run This addresses reviewer feedback about overly complex error handling with code duplication. Tested with GPT-4o and O3-mini models. 🤖 Generated with [Claude Code](https://claude.ai/code) Co-Authored-By: Claude <noreply@anthropic.com>
diff --git a/examples/deepresearch/evaluate_hle.py b/examples/deepresearch/evaluate_hle.py
@@ -518,8 +518,12 @@ async def main():
     parser.add_argument(
         "--parallel-tasks", type=int, default=4, help="Number of parallel tasks"
     )
+    # Default output directory relative to script location
+    script_dir = os.path.dirname(os.path.abspath(__file__))
+    default_output_dir = os.path.join(script_dir, "hle_outputs")
+
     parser.add_argument(
-        "--output-dir", default="./hle_outputs", help="Output directory for results"
+        "--output-dir", default=default_output_dir, help="Output directory for results"
     )
 
     args = parser.parse_args()
diff --git a/rllm/engine/rollout/openai_engine.py b/rllm/engine/rollout/openai_engine.py
@@ -9,45 +9,11 @@
 from rllm.parser import ChatTemplateParser, ToolParser
 
 
-def parse_openai_error_for_unsupported_param(error_message: str) -> tuple[str | None, str | None]:
-    """
-    Parse OpenAI API error to extract unsupported parameter and suggested replacement.
-
-    Returns: (unsupported_param, suggested_param) or (None, None) if not parseable
-
-    Example errors:
-    - "Unsupported parameter: 'max_tokens' is not supported with this model. Use 'max_completion_tokens' instead."
-    - "Unsupported value: 'temperature' does not support 0.6 with this model. Only the default (1) value is supported."
-    """
-    if "unsupported parameter" in error_message.lower():
-        # Extract parameter name from quotes
-        import re
-
-        match = re.search(r"'([^']+)'\s+is not supported", error_message, re.IGNORECASE)
-        if match:
-            unsupported = match.group(1)
-            # Check for suggested replacement
-            suggest_match = re.search(r"use\s+'([^']+)'\s+instead", error_message, re.IGNORECASE)
-            suggested = suggest_match.group(1) if suggest_match else None
-            return unsupported, suggested
-
-    if "unsupported value" in error_message.lower():
-        # Parameter exists but value not allowed - remove the param entirely
-        import re
-
-        match = re.search(r"'([^']+)'\s+does not support", error_message, re.IGNORECASE)
-        if match:
-            return match.group(1), None
-
-    return None, None
-
-
 class OpenAIEngine(RolloutEngine):
     def __init__(self, model: str, tokenizer=None, api_retries: int = 3, base_url: str = "https://api.openai.com/v1", api_key: str = os.getenv("OPENAI_API_KEY"), sampling_params: dict | None = None, **kwargs):
         self.model = model
         self.api_retries = api_retries
         self.sampling_params = sampling_params or {}
-        self._param_fixes_logged = set()  # Track which param fixes we've already logged
 
         self.tokenizer = tokenizer
         if self.tokenizer is not None:
@@ -65,14 +31,29 @@ def __init__(self, model: str, tokenizer=None, api_retries: int = 3, base_url: s
         self.client = openai.AsyncOpenAI(base_url=base_url, api_key=api_key)
         logging.getLogger("httpx").setLevel(logging.WARNING)
 
+    def _fix_unsupported_param(self, error_msg: str, sampling_params: dict) -> bool:
+        """Fix unsupported parameters based on error message. Returns True if fixed."""
+
+        # Try to extract unsupported parameter from error
+        if "max_tokens" in error_msg and "max_completion_tokens" in error_msg:
+            if "max_tokens" in sampling_params:
+                sampling_params["max_completion_tokens"] = sampling_params.pop("max_tokens")
+                return True
+
+        # Remove any unsupported parameter mentioned in error
+        for param in ["temperature", "top_p", "presence_penalty", "frequency_penalty"]:
+            if param in error_msg.lower() and "not support" in error_msg.lower():
+                sampling_params.pop(param, None)
+                return True
+
+        return False
+
     async def chat_completion(self, messages: list[dict], **kwargs) -> ModelOutput:
         sampling_params = self.sampling_params.copy()
         sampling_params.update(kwargs)
         sampling_params.pop("model", None)
 
         retries = self.api_retries
-        param_retry_budget = 10  # Allow up to 10 parameter fixes (reasoning models can reject many params)
-
         while retries > 0:
             try:
                 response = await self.client.chat.completions.create(model=self.model, messages=messages, timeout=3600, **sampling_params)
@@ -87,39 +68,9 @@ async def chat_completion(self, messages: list[dict], **kwargs) -> ModelOutput:
                 print("Sleep for 5 seconds for API limit.")
                 await asyncio.sleep(5)
             except openai.BadRequestError as e:
-                # Try to auto-fix unsupported parameters
                 error_msg = str(e)
-                unsupported_param, suggested_param = parse_openai_error_for_unsupported_param(error_msg)
-
-                if unsupported_param and param_retry_budget > 0:
-                    param_retry_budget -= 1
-
-                    # Only log this fix once per engine instance
-                    log_key = f"{unsupported_param}->{suggested_param}" if suggested_param else f"remove:{unsupported_param}"
-                    should_log = log_key not in self._param_fixes_logged
-                    if should_log:
-                        self._param_fixes_logged.add(log_key)
-                        print(f"⚠️  Model {self.model} doesn't support '{unsupported_param}', adjusting parameters...")
-
-                    if suggested_param:
-                        # Remap parameter (e.g., max_tokens -> max_completion_tokens)
-                        if unsupported_param in sampling_params:
-                            value = sampling_params.pop(unsupported_param)
-                            if suggested_param not in sampling_params:
-                                sampling_params[suggested_param] = value
-                                if should_log:
-                                    print(f"   Remapped '{unsupported_param}' -> '{suggested_param}'")
-                    else:
-                        # Just remove the unsupported parameter
-                        if unsupported_param in sampling_params:
-                            sampling_params.pop(unsupported_param)
-                            if should_log:
-                                print(f"   Removed '{unsupported_param}'")
-
-                    # Retry immediately with fixed params (don't decrement retries)
-                    continue
-
-                # Can't auto-fix or out of param retry budget
+                if self._fix_unsupported_param(error_msg, sampling_params):
+                    continue  # Retry with fixed params
                 retries -= 1
                 if retries == 0:
                     raise Exception(f"Error processing content after retries: {e}") from e
@@ -138,8 +89,6 @@ async def completion(self, prompt: str, **kwargs) -> ModelOutput:
         sampling_params.pop("model", None)
 
         retries = self.api_retries
-        param_retry_budget = 10  # Allow up to 10 parameter fixes (reasoning models can reject many params)
-
         while retries > 0:
             try:
                 response = await self.client.completions.create(model=self.model, prompt=prompt, timeout=3600, **sampling_params)
@@ -151,39 +100,9 @@ async def completion(self, prompt: str, **kwargs) -> ModelOutput:
                 print("Sleep for 5 seconds for API limit.")
                 await asyncio.sleep(5)
             except openai.BadRequestError as e:
-                # Try to auto-fix unsupported parameters
                 error_msg = str(e)
-                unsupported_param, suggested_param = parse_openai_error_for_unsupported_param(error_msg)
-
-                if unsupported_param and param_retry_budget > 0:
-                    param_retry_budget -= 1
-
-                    # Only log this fix once per engine instance
-                    log_key = f"{unsupported_param}->{suggested_param}" if suggested_param else f"remove:{unsupported_param}"
-                    should_log = log_key not in self._param_fixes_logged
-                    if should_log:
-                        self._param_fixes_logged.add(log_key)
-                        print(f"⚠️  Model {self.model} doesn't support '{unsupported_param}', adjusting parameters...")
-
-                    if suggested_param:
-                        # Remap parameter (e.g., max_tokens -> max_completion_tokens)
-                        if unsupported_param in sampling_params:
-                            value = sampling_params.pop(unsupported_param)
-                            if suggested_param not in sampling_params:
-                                sampling_params[suggested_param] = value
-                                if should_log:
-                                    print(f"   Remapped '{unsupported_param}' -> '{suggested_param}'")
-                    else:
-                        # Just remove the unsupported parameter
-                        if unsupported_param in sampling_params:
-                            sampling_params.pop(unsupported_param)
-                            if should_log:
-                                print(f"   Removed '{unsupported_param}'")
-
-                    # Retry immediately with fixed params (don't decrement retries)
-                    continue
-
-                # Can't auto-fix or out of param retry budget
+                if self._fix_unsupported_param(error_msg, sampling_params):
+                    continue  # Retry with fixed params
                 retries -= 1
                 if retries == 0:
                     raise Exception(f"Error processing content after retries: {e}") from e

Original file line number	Diff line number	Diff line change
`@@ -518,8 +518,12 @@ async def main():`
`518`	`518`	`parser.add_argument(`
`519`	`519`	`"--parallel-tasks", type=int, default=4, help="Number of parallel tasks"`
`520`	`520`	`)`
	`521`	`+ # Default output directory relative to script location`
	`522`	`+ script_dir = os.path.dirname(os.path.abspath(__file__))`
	`523`	`+ default_output_dir = os.path.join(script_dir, "hle_outputs")`
	`524`	`+`
`521`	`525`	`parser.add_argument(`
`522`		`- "--output-dir", default="./hle_outputs", help="Output directory for results"`
	`526`	`+ "--output-dir", default=default_output_dir, help="Output directory for results"`
`523`	`527`	`)`
`524`	`528`
`525`	`529`	`args = parser.parse_args()`