EleutherAI · tboerstad · Sep 22, 2025 · Oct 3, 2025 · Oct 3, 2025
@@ -95,7 +95,15 @@ def parse_generations(outputs: Union[Dict, List[Dict]], **kwargs) -> List[str]:
         for out in outputs:
             tmp = [None] * len(out["choices"])
             for choices in out["choices"]:
-                tmp[choices["index"]] = choices["text"]
+                x = choices["text"]
+                content = x if x is not None else ""
+                if not content:
+                    eval_logger.warning(
+                        f"Received empty response for choice {choices['index']}. "
+                        "This can happen when using reasoning models if the model spends the entire token budget on reasoning. "
+                        "Consider increasing the number of allowed tokens."
+                    )
+                tmp[choices["index"]] = content
             res = res + tmp
         return res
 
@@ -167,7 +175,15 @@ def parse_generations(outputs: Union[Dict, List[Dict]], **kwargs) -> List[str]:
         for out in outputs:
             tmp = [None] * len(out["choices"])
             for choices in out["choices"]:
-                tmp[choices["index"]] = choices["message"]["content"]
+                x = choices["message"]["content"]
+                content = x if x is not None else ""
+                if not content:
+                    eval_logger.warning(
+                        f"Received empty response for choice {choices['index']}. "
+                        "This can happen when using reasoning models if the model spends the entire token budget on reasoning. "
+                        "Consider increasing the number of allowed tokens."
+                    )
+                tmp[choices["index"]] = content
             res = res + tmp
         return res
 

@@ -3,6 +3,7 @@
 
 import pytest
 
+from lm_eval.api.instance import Instance
 from lm_eval.models.openai_completions import LocalCompletionsAPI
 
 
@@ -161,6 +162,29 @@ def test_model_tokenized_call_usage(
         assert result == {"result": "success"}
 
 
+def test_generate_until_with_null_message_content(api):
+    with patch("requests.post") as mock_post:
+        mock_response = MagicMock()
+        mock_response.json.return_value = {
+            "choices": [
+                {
+                    "index": 0,
+                    "text": None,
+                }
+            ]
+        }
+        mock_response.ok = True
+        mock_post.return_value = mock_response
+        request = Instance(
+            request_type="generate_until",
+            doc={},
+            arguments=("Test prompt", {"max_gen_toks": 10}),
+            idx=0,
+        )
+
+        _ = api.generate_until([request])
+
+
 class DummyAsyncContextManager:
     def __init__(self, result):
         self.result = result