Skip to content

Commit

Permalink
Merge pull request #1517 from mito-ds/smartdebug-full-error
Browse files Browse the repository at this point in the history
Smartdebug full error
  • Loading branch information
aarondr77 authored Feb 11, 2025
2 parents ad8648d + 90c88ac commit c31ed7d
Show file tree
Hide file tree
Showing 22 changed files with 854 additions and 465 deletions.
3 changes: 1 addition & 2 deletions evals/ai_api_calls/get_open_ai_completion.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,10 +5,9 @@
def get_open_ai_completion(prompt: str, model: str) -> str:
client = OpenAI(api_key=os.getenv("OPENAI_API_KEY"))


completion_function_params = {
"model": model,
"stream": True,
"stream": False,
"messages": [
{"role": "system", "content": "You are an expert Python programmer."},
{"role": "user", "content": prompt}
Expand Down
6 changes: 4 additions & 2 deletions evals/prompts/smart_debug_prompts/__init__.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,10 @@

from evals.prompts.smart_debug_prompts.prod_prompt_v1 import prod_prompt_v1_generator
from evals.prompts.smart_debug_prompts.prod_prompt_v2 import prod_prompt_v2_generator

from evals.prompts.smart_debug_prompts.prod_prompt_v3 import prod_prompt_v3_generator

SMART_DEBUG_PROMPT_GENERATORS = [
prod_prompt_v1_generator,
prod_prompt_v2_generator
prod_prompt_v2_generator,
prod_prompt_v3_generator
]
155 changes: 155 additions & 0 deletions evals/prompts/smart_debug_prompts/prod_prompt_v3.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,155 @@

from evals.eval_types import DebugPromptGenerator, NotebookState

__all__ = ["prod_prompt_v3_generator"]

class _ProdPromptV3Generator(DebugPromptGenerator):
prompt_name = "prod_prompt_v3"


def get_prompt(self, error_message: str, notebook_state: NotebookState) -> str:

return f"""You are debugging code in a JupyterLab 4 notebook. Analyze the error and provide a solution that maintains the original intent.
<Example 1>
Defined Variables:
{{
'revenue_multiplier': 1.5,
'sales_df': pd.DataFrame({{
'transaction_date': ['2024-01-02', '2024-01-02', '2024-01-02', '2024-01-02', '2024-01-03'],
'price_per_unit': [10, 9.99, 13.99, 21.00, 100],
'units_sold': [1, 2, 1, 4, 5],
'total_price': [10, 19.98, 13.99, 84.00, 500]
}})
}}
Code in active cell:
```python
sales_df['total_revenue'] = sales_df['price'] * revenue_multiplier
```
Error Traceback:
Cell In[24], line 9
2 revenue_multiplier = 1.5
3 sales_df = pd.DataFrame({{
4 'transaction_date': ['2024-01-02', '2024-01-02', '2024-01-02', '2024-01-02', '2024-01-03'],
5 'price_per_unit': [10, 9.99, 13.99, 21.00, 100],
6 'units_sold': [1, 2, 1, 4, 5],
7 'total_price': [10, 19.98, 13.99, 84.00, 500]
8 }})
----> 9 sales_df['total_revenue'] = sales_df['price'] * revenue_multiplier
KeyError: 'price'
ERROR ANALYSIS:
Runtime error: Attempted to access non-existent DataFrame column
INTENT ANALYSIS:
User is trying to calculate total revenue by applying a multiplier to transaction prices. Based on the defined variables, the column that the user is tring to access is likely `total_price` because that would allow them to calculate the total revenue for each transaction.
SOLUTION:
```python
sales_df['total_revenue'] = sales_df['total_price'] * revenue_multiplier
```
The DataFrame contains 'total_price' rather than 'price'. Updated column reference to match existing data structure.
</Example 1>
<Example 2>
Defined Variables:
{{
'df': pd.DataFrame({{
'order_id': [1, 2, 3, 4],
'date': ['Mar 7, 2025', 'Sep 24, 2024', '25 June, 2024', 'June 29, 2024'],
'amount': [100, 150, 299, 99]
}})
}}
Code in active cell:
```python
df['date'] = pd.to_datetime(df['date'])
```
Error Traceback:
Cell In[27], line 1
----> 1 df['date'] = pd.to_datetime(df['date'])
ValueError: time data "25 June, 2024" doesn't match format "%b %d, %Y", at position 2. You might want to try:
- passing `format` if your strings have a consistent format;
- passing `format='ISO8601'` if your strings are all ISO8601 but not necessarily in exactly the same format;
- passing `format='mixed'`, and the format will be inferred for each element individually. You might want to use `dayfirst` alongside this.
ERROR ANALYSIS:
This is a ValueError caused by applying the wrong format to a specific date string. Because it was triggered at position 2, the first date string must have successfully converted. By looking at the defined variables, I can see that first date string is in the format "Mar 7, 2025", but the third date string is in the format "25 June, 2024". Those dates are not in the same format, so the conversion failed.
INTENT ANALYSIS:
User is trying to convert the date column to a datetime object even though the dates are not in the same starting format.
SOLUTION:
```python
def parse_date(date_str):
formats = ['%b %d, %Y', '%d %B, %Y']
for fmt in formats:
try:
return pd.to_datetime(date_str, format=fmt)
except ValueError:
# Try the next format
continue
# If no format worked, return Not a Time
return pd.NaT
df['date'] = df['date'].apply(lambda x: parse_date(x))
```
Since the dates are not in a consistent format, we need to first figure out which format to use for each date string and then use that format to convert the date.
The best way to do this is with a function. We can call this function `parse_date`.
</Example 2>
Guidelines for Solutions:
Error Analysis:
- Identify error type (Syntax, Runtime, Logic).
- Use the defined variables and code in the active cell to understand the error.
- Consider kernel state and execution order
Intent Preservation:
- Try to understand the user's intent using the defined variables and code in the active cell.
Solution Requirements:
- Return the full code cell with the error fixed and a short explanation of the error.
- Only update code in the active cell. Do not update other code in the notebook.
- Propose a solution that fixes the error and does not change the user's intent.
- Make the solution as simple as possible.
- Reuse as much of the existing code as possible.
- Do not add temporary comments like '# Fixed the typo here' or '# Added this line to fix the error'
- The code in the SOLUTION section should be a python code block starting with ```python and ending with ```
Here is your task.
Defined Variables:
{notebook_state.global_vars}
Code in active cell:
```python
{notebook_state.cell_contents[-1] if len(notebook_state.cell_contents) > 0 else ""}
```
Error Traceback:
{error_message}
ERROR ANALYSIS:
INTENT ANALYSIS:
SOLUTION:
"""

prod_prompt_v3_generator = _ProdPromptV3Generator()
3 changes: 2 additions & 1 deletion evals/requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -3,4 +3,5 @@ types-setuptools
openai>=1.0.0
prettytable>=3.0.0
pandas>=2.0.0
matplotlib>=3.0.0
matplotlib>=3.0.0
ipython
3 changes: 2 additions & 1 deletion evals/test_cases/smart_debug_tests/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,10 +8,11 @@
from evals.test_cases.smart_debug_tests.import_tests import IMPORT_TESTS
from evals.test_cases.smart_debug_tests.matplotlib_tests import MATPLOTLIB_TESTS


SMART_DEBUG_TESTS: List[SmartDebugTestCase] = [
*TESTS,
*FUNCTION_TESTS,
*PANDAS_TESTS,
*IMPORT_TESTS,
*MATPLOTLIB_TESTS
*MATPLOTLIB_TESTS,
]
83 changes: 74 additions & 9 deletions evals/test_runners/smart_debugger_test_runner.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,10 @@
from evals.test_cases.smart_debug_tests import SMART_DEBUG_TESTS
from evals.test_runners.utils import exec_code_and_get_globals_and_output
from evals.utils import get_script_from_cells, print_test_case_result_tables
from IPython.core.interactiveshell import InteractiveShell
from io import StringIO
import sys
import re


def run_smart_debug_tests(test_name: Optional[str], prompt_name: Optional[str], tags: Optional[List[str]], model: Optional[str]):
Expand Down Expand Up @@ -64,18 +68,13 @@ def run_smart_debug_test(test: SmartDebugTestCase, prompt_generator: DebugPrompt
# into a single script when we execute it anyways.
invalid_notebook_state.cell_contents.append(test.invalid_code)
invalid_code_cells_script = get_script_from_cells(invalid_notebook_state.cell_contents, include_current_cell=True)

# Exec the invalid code and get the error message
error_message = None
try:
exec(invalid_code_cells_script, {})
except Exception as e:
error_type = e.__class__.__name__
error_message = f"{error_type}: {str(e)}"
error_message = get_structured_error(invalid_code_cells_script)

print(f"Error message: {error_message}")
#print(f"Error message: {error_message}")
if error_message is None:
raise ValueError("Broken Test: Test did not produce an error.")
print("Broken Test: Test did not produce an error.")

# Ask the AI to correct the error
# Make sure to use the invalid_notebook_state so that the prompt can include the
Expand Down Expand Up @@ -117,3 +116,69 @@ def run_smart_debug_test(test: SmartDebugTestCase, prompt_generator: DebugPrompt
print(f"Actual output: {actual_output}\n")

return TestCaseResult(test=test, passed=passed)


def get_structured_error(code):
ipython = InteractiveShell.instance()
stdout_capture = StringIO()
original_stdout = sys.stdout

try:
sys.stdout = stdout_capture
result = ipython.run_cell(code)

if result.error_before_exec or result.error_in_exec:
full_traceback = strip_ansi_codes(stdout_capture.getvalue())

# Close the stdout capture
sys.stdout = original_stdout
stdout_capture.close()

lines = full_traceback.split('\n')

filtered_lines = []
capturing = False

for line in lines:
# Always include error headers
if '--------------------' in line:
filtered_lines.append(line)
continue

# Start capturing when we see a Cell block
if line.strip().startswith('Cell In['):
capturing = True
filtered_lines.append(line)
continue

# Keep capturing until we hit an empty line
if capturing:
if line.strip() == '':
filtered_lines.append('')
capturing = False
else:
filtered_lines.append(line)

# Always include the final, non-empty line
# This is the last line that is not ""
non_empty_lines = [line for line in lines if line != ""]
filtered_lines.append(non_empty_lines[-1])

sys.stdout = original_stdout # Restore stdout before printing
return '\n'.join(filtered_lines)
else:
sys.stdout = original_stdout # Restore stdout before printing
print("Test Failure 1: No error was produced")
return None
except Exception as e:
sys.stdout = original_stdout # Restore stdout before printing
print(f"Test Failure 2: Error running code in IPython shell: {e}")
return None
finally:
sys.stdout = original_stdout
stdout_capture.close()

def strip_ansi_codes(text):
"""Remove ANSI escape sequences from text"""
ansi_escape = re.compile(r'\x1B(?:[@-Z\\-_]|\[[0-?]*[ -/]*[@-~])')
return ansi_escape.sub('', text)
7 changes: 4 additions & 3 deletions mito-ai/mito_ai/handlers.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,9 +28,9 @@
InlineCompletionMessageBuilder,
SmartDebugMessageBuilder
)
from .prompt_builders import remove_inner_thoughts_from_message
from .providers import OpenAIProvider
from .utils.create import initialize_user
from mito_ai.prompt_builders.smart_debug_prompt import remove_inner_thoughts_from_message
from mito_ai.providers import OpenAIProvider
from mito_ai.utils.create import initialize_user
from mito_ai.providers import OpenAIProvider
from mito_ai.utils.create import initialize_user
from mito_ai.utils.version_utils import is_pro
Expand Down Expand Up @@ -111,6 +111,7 @@ async def on_message(self, message: str) -> None: # type: ignore
Args:
message: The message received on the WebSocket.
"""

# first, verify that the message is an `CompletionRequest`.
self.log.debug("Message received: %s", message)
try:
Expand Down
12 changes: 5 additions & 7 deletions mito-ai/mito_ai/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,13 +7,11 @@
from pydantic import BaseModel
from openai.types.chat import ChatCompletionMessageParam

from mito_ai.prompt_builders import (
create_chat_prompt,
create_inline_prompt,
create_explain_code_prompt,
create_error_prompt,
create_agent_prompt,
)
from mito_ai.prompt_builders.chat_prompt import create_chat_prompt
from mito_ai.prompt_builders.inline_completer_prompt import create_inline_prompt
from mito_ai.prompt_builders.explain_code_prompt import create_explain_code_prompt
from mito_ai.prompt_builders.smart_debug_prompt import create_error_prompt
from mito_ai.prompt_builders.agent_planning_prompt import create_agent_prompt

CompletionIncomingMessageTypes = Literal['chat', 'inline_completion', 'codeExplain', 'smartDebug', 'agent:planning']
IncomingMessageTypes = Union[Literal['clear_history', 'fetch_history'], CompletionIncomingMessageTypes]
Expand Down
Loading

0 comments on commit c31ed7d

Please sign in to comment.