Merge pull request #1517 from mito-ds/smartdebug-full-error

Smartdebug full error
mito-ds · Feb 11, 2025 · c31ed7d · c31ed7d
2 parents ad8648d + 90c88ac
commit c31ed7d
Show file tree

Hide file tree

Showing 22 changed files with 854 additions and 465 deletions.
diff --git a/evals/ai_api_calls/get_open_ai_completion.py b/evals/ai_api_calls/get_open_ai_completion.py
@@ -5,10 +5,9 @@
 def get_open_ai_completion(prompt: str, model: str) -> str:
     client = OpenAI(api_key=os.getenv("OPENAI_API_KEY"))
 
-
     completion_function_params = {
         "model": model,
-        "stream": True,
+        "stream": False,
         "messages": [
             {"role": "system", "content": "You are an expert Python programmer."},
             {"role": "user", "content": prompt}

diff --git a/evals/prompts/smart_debug_prompts/__init__.py b/evals/prompts/smart_debug_prompts/__init__.py
@@ -1,8 +1,10 @@
 
 from evals.prompts.smart_debug_prompts.prod_prompt_v1 import prod_prompt_v1_generator
 from evals.prompts.smart_debug_prompts.prod_prompt_v2 import prod_prompt_v2_generator
-
+from evals.prompts.smart_debug_prompts.prod_prompt_v3 import prod_prompt_v3_generator
+
 SMART_DEBUG_PROMPT_GENERATORS = [
     prod_prompt_v1_generator,
-    prod_prompt_v2_generator
+    prod_prompt_v2_generator,
+    prod_prompt_v3_generator
 ]
diff --git a/evals/prompts/smart_debug_prompts/prod_prompt_v3.py b/evals/prompts/smart_debug_prompts/prod_prompt_v3.py
@@ -0,0 +1,155 @@
+
+from evals.eval_types import DebugPromptGenerator, NotebookState
+
+__all__ = ["prod_prompt_v3_generator"]
+
+class _ProdPromptV3Generator(DebugPromptGenerator):
+    prompt_name = "prod_prompt_v3"
+
+
+    def get_prompt(self, error_message: str, notebook_state: NotebookState) -> str:
+
+        return f"""You are debugging code in a JupyterLab 4 notebook. Analyze the error and provide a solution that maintains the original intent.
+
+<Example 1>
+Defined Variables:
+{{
+    'revenue_multiplier': 1.5,
+    'sales_df': pd.DataFrame({{
+        'transaction_date': ['2024-01-02', '2024-01-02', '2024-01-02', '2024-01-02', '2024-01-03'],
+        'price_per_unit': [10, 9.99, 13.99, 21.00, 100],
+        'units_sold': [1, 2, 1, 4, 5],
+        'total_price': [10, 19.98, 13.99, 84.00, 500]
+    }})
+}}
+
+Code in active cell:
+```python
+sales_df['total_revenue'] = sales_df['price'] * revenue_multiplier
+```
+
+Error Traceback:
+Cell In[24], line 9
+      2 revenue_multiplier =  1.5
+      3 sales_df = pd.DataFrame({{
+      4         'transaction_date': ['2024-01-02', '2024-01-02', '2024-01-02', '2024-01-02', '2024-01-03'],
+      5         'price_per_unit': [10, 9.99, 13.99, 21.00, 100],
+      6         'units_sold': [1, 2, 1, 4, 5],
+      7         'total_price': [10, 19.98, 13.99, 84.00, 500]
+      8 }})
+----> 9 sales_df['total_revenue'] = sales_df['price'] * revenue_multiplier
+
+KeyError: 'price'
+
+
+ERROR ANALYSIS:
+Runtime error: Attempted to access non-existent DataFrame column
+
+INTENT ANALYSIS:
+User is trying to calculate total revenue by applying a multiplier to transaction prices. Based on the defined variables, the column that the user is tring to access is likely `total_price` because that would allow them to calculate the total revenue for each transaction.
+
+SOLUTION:
+```python
+sales_df['total_revenue'] = sales_df['total_price'] * revenue_multiplier
+```
+
+The DataFrame contains 'total_price' rather than 'price'. Updated column reference to match existing data structure.
+</Example 1>
+
+<Example 2>
+Defined Variables:
+{{
+    'df': pd.DataFrame({{
+        'order_id': [1, 2, 3, 4],
+        'date': ['Mar 7, 2025', 'Sep 24, 2024', '25 June, 2024', 'June 29, 2024'],
+        'amount': [100, 150, 299, 99]
+    }})
+}}
+
+Code in active cell:
+```python
+df['date'] = pd.to_datetime(df['date'])
+```
+
+Error Traceback:
+Cell In[27], line 1
+----> 1 df['date'] = pd.to_datetime(df['date'])
+
+ValueError: time data "25 June, 2024" doesn't match format "%b %d, %Y", at position 2. You might want to try:
+    - passing `format` if your strings have a consistent format;
+    - passing `format='ISO8601'` if your strings are all ISO8601 but not necessarily in exactly the same format;
+    - passing `format='mixed'`, and the format will be inferred for each element individually. You might want to use `dayfirst` alongside this.
+
+ERROR ANALYSIS:
+This is a ValueError caused by applying the wrong format to a specific date string. Because it was triggered at position 2, the first date string must have successfully converted. By looking at the defined variables, I can see that first date string is in the format "Mar 7, 2025", but the third date string is in the format "25 June, 2024". Those dates are not in the same format, so the conversion failed.
+
+INTENT ANALYSIS:
+User is trying to convert the date column to a datetime object even though the dates are not in the same starting format. 
+
+SOLUTION:
+```python
+def parse_date(date_str):
+    formats = ['%b %d, %Y', '%d %B, %Y']
+    
+    for fmt in formats:
+        try:
+            return pd.to_datetime(date_str, format=fmt)
+        except ValueError:
+            # Try the next format
+            continue
+            
+    # If no format worked, return Not a Time
+    return pd.NaT
+
+df['date'] = df['date'].apply(lambda x: parse_date(x))
+```
+
+Since the dates are not in a consistent format, we need to first figure out which format to use for each date string and then use that format to convert the date.
+
+The best way to do this is with a function. We can call this function `parse_date`.
+</Example 2>
+
+
+Guidelines for Solutions:
+
+Error Analysis:
+
+- Identify error type (Syntax, Runtime, Logic).
+- Use the defined variables and code in the active cell to understand the error.
+- Consider kernel state and execution order
+
+Intent Preservation:
+
+- Try to understand the user's intent using the defined variables and code in the active cell.
+
+Solution Requirements:
+
+- Return the full code cell with the error fixed and a short explanation of the error.
+- Only update code in the active cell. Do not update other code in the notebook.
+- Propose a solution that fixes the error and does not change the user's intent.
+- Make the solution as simple as possible.
+- Reuse as much of the existing code as possible.
+- Do not add temporary comments like '# Fixed the typo here' or '# Added this line to fix the error'
+- The code in the SOLUTION section should be a python code block starting with ```python and ending with ```
+
+Here is your task. 
+
+Defined Variables:
+{notebook_state.global_vars}
+
+Code in active cell:
+```python
+{notebook_state.cell_contents[-1] if len(notebook_state.cell_contents) > 0 else ""}
+```
+
+Error Traceback:
+{error_message}
+
+ERROR ANALYSIS:
+
+INTENT ANALYSIS:
+
+SOLUTION:
+"""
+
+prod_prompt_v3_generator = _ProdPromptV3Generator()
diff --git a/evals/requirements.txt b/evals/requirements.txt
@@ -3,4 +3,5 @@ types-setuptools
 openai>=1.0.0
 prettytable>=3.0.0
 pandas>=2.0.0
-matplotlib>=3.0.0
+matplotlib>=3.0.0
+ipython
diff --git a/evals/test_cases/smart_debug_tests/__init__.py b/evals/test_cases/smart_debug_tests/__init__.py
@@ -8,10 +8,11 @@
 from evals.test_cases.smart_debug_tests.import_tests import IMPORT_TESTS
 from evals.test_cases.smart_debug_tests.matplotlib_tests import MATPLOTLIB_TESTS
 
+
 SMART_DEBUG_TESTS: List[SmartDebugTestCase] = [
     *TESTS,
     *FUNCTION_TESTS,
     *PANDAS_TESTS,
     *IMPORT_TESTS,
-    *MATPLOTLIB_TESTS
+    *MATPLOTLIB_TESTS,
 ]
diff --git a/evals/test_runners/smart_debugger_test_runner.py b/evals/test_runners/smart_debugger_test_runner.py
@@ -8,6 +8,10 @@
 from evals.test_cases.smart_debug_tests import SMART_DEBUG_TESTS
 from evals.test_runners.utils import exec_code_and_get_globals_and_output
 from evals.utils import get_script_from_cells, print_test_case_result_tables
+from IPython.core.interactiveshell import InteractiveShell
+from io import StringIO
+import sys
+import re
 
 
 def run_smart_debug_tests(test_name: Optional[str], prompt_name: Optional[str], tags: Optional[List[str]], model: Optional[str]):
@@ -64,18 +68,13 @@ def run_smart_debug_test(test: SmartDebugTestCase, prompt_generator: DebugPrompt
     # into a single script when we execute it anyways. 
     invalid_notebook_state.cell_contents.append(test.invalid_code)
     invalid_code_cells_script = get_script_from_cells(invalid_notebook_state.cell_contents, include_current_cell=True)
-
+    
     # Exec the invalid code and get the error message
-    error_message = None
-    try:
-        exec(invalid_code_cells_script, {})
-    except Exception as e:
-        error_type = e.__class__.__name__
-        error_message = f"{error_type}: {str(e)}"
+    error_message = get_structured_error(invalid_code_cells_script)
 
-    print(f"Error message: {error_message}")
+    #print(f"Error message: {error_message}")
     if error_message is None:
-        raise ValueError("Broken Test: Test did not produce an error.")
+        print("Broken Test: Test did not produce an error.")
 
     # Ask the AI to correct the error
     # Make sure to use the invalid_notebook_state so that the prompt can include the 
@@ -117,3 +116,69 @@ def run_smart_debug_test(test: SmartDebugTestCase, prompt_generator: DebugPrompt
         print(f"Actual output: {actual_output}\n")
 
     return TestCaseResult(test=test, passed=passed)
+
+
+def get_structured_error(code):
+    ipython = InteractiveShell.instance()
+    stdout_capture = StringIO()
+    original_stdout = sys.stdout
+
+    try:
+        sys.stdout = stdout_capture
+        result = ipython.run_cell(code)
+
+        if result.error_before_exec or result.error_in_exec:
+            full_traceback = strip_ansi_codes(stdout_capture.getvalue())
+
+            # Close the stdout capture
+            sys.stdout = original_stdout
+            stdout_capture.close()
+
+            lines = full_traceback.split('\n')
+
+            filtered_lines = []
+            capturing = False
+
+            for line in lines:
+                # Always include error headers
+                if '--------------------' in line:
+                    filtered_lines.append(line)
+                    continue
+
+                # Start capturing when we see a Cell block
+                if line.strip().startswith('Cell In['):
+                    capturing = True
+                    filtered_lines.append(line)
+                    continue
+
+                # Keep capturing until we hit an empty line
+                if capturing:
+                    if line.strip() == '':
+                        filtered_lines.append('')
+                        capturing = False
+                    else:
+                        filtered_lines.append(line)
+
+            # Always include the final, non-empty line
+            # This is the last line that is not ""
+            non_empty_lines = [line for line in lines if line != ""]
+            filtered_lines.append(non_empty_lines[-1])
+
+            sys.stdout = original_stdout  # Restore stdout before printing
+            return '\n'.join(filtered_lines)
+        else: 
+            sys.stdout = original_stdout  # Restore stdout before printing
+            print("Test Failure 1: No error was produced")
+            return None
+    except Exception as e:
+        sys.stdout = original_stdout  # Restore stdout before printing
+        print(f"Test Failure 2: Error running code in IPython shell: {e}")
+        return None
+    finally:
+        sys.stdout = original_stdout
+        stdout_capture.close()
+
+def strip_ansi_codes(text):
+    """Remove ANSI escape sequences from text"""
+    ansi_escape = re.compile(r'\x1B(?:[@-Z\\-_]|\[[0-?]*[ -/]*[@-~])')
+    return ansi_escape.sub('', text)
diff --git a/mito-ai/mito_ai/handlers.py b/mito-ai/mito_ai/handlers.py
@@ -28,9 +28,9 @@
     InlineCompletionMessageBuilder,
     SmartDebugMessageBuilder
 )
-from .prompt_builders import remove_inner_thoughts_from_message
-from .providers import OpenAIProvider
-from .utils.create import initialize_user
+from mito_ai.prompt_builders.smart_debug_prompt import remove_inner_thoughts_from_message
+from mito_ai.providers import OpenAIProvider
+from mito_ai.utils.create import initialize_user
 from mito_ai.providers import OpenAIProvider
 from mito_ai.utils.create import initialize_user
 from mito_ai.utils.version_utils import is_pro
@@ -111,6 +111,7 @@ async def on_message(self, message: str) -> None: # type: ignore
         Args:
             message: The message received on the WebSocket.
         """
+
         # first, verify that the message is an `CompletionRequest`.
         self.log.debug("Message received: %s", message)
         try:

diff --git a/mito-ai/mito_ai/models.py b/mito-ai/mito_ai/models.py
@@ -7,13 +7,11 @@
 from pydantic import BaseModel
 from openai.types.chat import ChatCompletionMessageParam
 
-from mito_ai.prompt_builders import (
-    create_chat_prompt,
-    create_inline_prompt,
-    create_explain_code_prompt,
-    create_error_prompt,
-    create_agent_prompt,
-)
+from mito_ai.prompt_builders.chat_prompt import create_chat_prompt
+from mito_ai.prompt_builders.inline_completer_prompt import create_inline_prompt
+from mito_ai.prompt_builders.explain_code_prompt import create_explain_code_prompt
+from mito_ai.prompt_builders.smart_debug_prompt import create_error_prompt
+from mito_ai.prompt_builders.agent_planning_prompt import create_agent_prompt
 
 CompletionIncomingMessageTypes = Literal['chat', 'inline_completion', 'codeExplain', 'smartDebug', 'agent:planning']
 IncomingMessageTypes = Union[Literal['clear_history', 'fetch_history'], CompletionIncomingMessageTypes]