Added v 1.6

haseeb-heaven · Dec 1, 2023 · 3f4b01a · 3f4b01a
1 parent 3e2fffa
commit 3f4b01a
Show file tree

Hide file tree

Showing 7 changed files with 118 additions and 15 deletions.
diff --git a/README.md b/README.md
@@ -254,12 +254,13 @@ If you're interested in contributing to **Open-Code-Interpreter**, we'd love to
 
 ## 📌 **Versioning**
 
-**v1.0** - Initial release.</br>
-**v1.1** - Added Graphs and Charts support.</br>
-**v1.2** - Added LiteLLM Support.</br>
-**v1.3** - Added Gpt 3.5 Support.</br>
-**v1.4** - Added PALM 2 Support.</br>
-**v1.5** - Added Gpt 3.5/4 models official Support.</br>
+🚀 ***v1.0*** - Initial release.</br>
+📊 ***v1.1*** - Added **Graphs** and **Charts** support.</br>
+🔥 ***v1.2*** - Added **LiteLLM** Support.</br>
+🌟 ***v1.3*** - Added **GPT 3.5** Support.</br>
+🌴 ***v1.4*** - Added **PALM 2** Support.</br>
+🎉 ***v1.5*** - Added **GPT 3.5/4** models official Support.</br>
+📝 ***v1.6*** - Updated Code Interpreter for Documents files (***JSON***, ***CSV***,***XML***).</br>
 
 ## 📜 **License**
 

diff --git a/configs/code-llama.config b/configs/code-llama.config
@@ -11,7 +11,7 @@
  end_sep = ```
 
  # If True, the first line of the generated text will be skipped.
- skip_first_line = False
+ skip_first_line = True
 
  # The model used for generating the code.
  HF_MODEL = codellama/CodeLlama-34b-Instruct-hf

diff --git a/configs/gpt-3.5-turbo.config b/configs/gpt-3.5-turbo.config
@@ -14,4 +14,7 @@ end_sep = ```
 skip_first_line = True
 
 # The model used for generating the code.
-HF_MODEL = gpt-3.5-turbo
+HF_MODEL = gpt-3.5-turbo
+
+# The base api change for GPT 3.5 Tubro.
+api_base = https://heaven-gpt.haseebmir.repl.co
diff --git a/configs/gpt-4.config b/configs/gpt-4.config
@@ -14,4 +14,7 @@ end_sep = ```
 skip_first_line = True
 
 # The model used for generating the code.
-HF_MODEL = gpt-4
+HF_MODEL = gpt-4
+
+# Use the Custom GPT-4 model.
+api_base = https://heaven-gpt.haseebmir.repl.co
diff --git a/interpreter.py b/interpreter.py
@@ -12,7 +12,7 @@
 --display_code, -dc: Displays the generated code in the output.
 
 Author: HeavenHM
-Date: 2023/10/12
+Date: 2023/12/01
 """
 
 from libs.interpreter_lib import Interpreter

diff --git a/libs/interpreter_lib.py b/libs/interpreter_lib.py
@@ -27,7 +27,7 @@
 class Interpreter:
     logger = None
     client = None
-    interpreter_version = "1.5"
+    interpreter_version = "1.6"
 
     def __init__(self, args):
         self.args = args
@@ -302,7 +302,6 @@ def interpreter_main(self):
 
         while True:
             try:
-
                 task = input("> ")
                 if task.lower() in ['exit', 'quit']:
                     break
@@ -311,15 +310,63 @@ def interpreter_main(self):
                 # Clean the responses
                 self._clean_responses()
 
+                # Check if prompt contains any file uploaded by user.
+                extracted_name = self.utility_manager.extract_file_name(prompt)
+                self.logger.info(f"Input prompt extracted_name: '{extracted_name}'")
+
+                if extracted_name is not None:
+                    full_path = self.utility_manager.get_full_file_path(extracted_name)
+                    self.logger.info(f"Input prompt full_path: '{full_path}'")
+
+
+                    # Check if the file exists and is a file
+                    if os.path.isfile(full_path):
+                        # Check if file size is less than 50 KB
+                        file_size = os.path.getsize(full_path)
+                        self.logger.info(f"Input prompt file_size: '{file_size}'")
+                        if file_size < 50000:
+                            try:
+                                with open(full_path, 'r', encoding='utf-8') as file:
+                                    # Check if file extension is .json, .csv, or .xml
+                                    file_extension = os.path.splitext(full_path)[1].lower()
+
+                                    if file_extension in ['.json','.xml']:
+                                        # Split by new line and read only 100 lines
+                                        file_data = '\n'.join(file.readline() for _ in range(20))
+                                        self.logger.info(f"Input prompt JSON/XML file_data: '{str(file_data)}'")
+
+                                    elif file_extension == '.csv':
+                                        # Read only headers of the csv file
+                                        file_data = self.utility_manager.read_csv_headers(full_path)
+                                        self.logger.info(f"Input prompt CSV file_data: '{str(file_data)}'")
+
+                                    else:
+                                        file_data = file.read()
+                                        self.logger.info(f"Input prompt file_data: '{str(file_data)}'")
+
+                                    if any(word in prompt.lower() for word in ['graph', 'graphs', 'chart', 'charts']):
+                                        prompt += "\n" + "This is file data from user input: " + str(file_data) + " use this to analyze the data."
+                                        self.logger.info(f"Input Prompt: '{prompt}'")
+                                    else:
+                                        self.logger.info("The prompt does not contain both 'graph' and 'chart'.")
+                            except Exception as exception:
+                                self.logger.error(f"Error reading file: {exception}")
+                        else:
+                            self.logger.error("File size is greater.")
+                    else:
+                        self.logger.error("File does not exist or is not a file.")                         
+                else:
+                    self.logger.info("No file name found in the prompt.")
+
                 # If graph were requested.
-                if 'graph' in prompt.lower():
+                if any(word in prompt.lower() for word in ['graph', 'graphs']):
                     if self.INTERPRETER_LANGUAGE == 'python':
                         prompt += "\n" + "using Python use Matplotlib save the graph in file called 'graph.png'"
                     elif self.INTERPRETER_LANGUAGE == 'javascript':
                         prompt += "\n" + "using JavaScript use Chart.js save the graph in file called 'graph.png'"
 
                 # if Chart were requested
-                if 'chart' in prompt.lower() or 'plot' in prompt.lower():
+                if any(word in prompt.lower() for word in ['chart', 'charts', 'plot', 'plots']):    
                     if self.INTERPRETER_LANGUAGE == 'python':
                         prompt += "\n" + "using Python use Plotly save the chart in file called 'chart.png'"
                     elif self.INTERPRETER_LANGUAGE == 'javascript':
@@ -331,13 +378,16 @@ def interpreter_main(self):
                         prompt += "\n" + "using Python use Pandas save the table in file called 'table.md'"
                     elif self.INTERPRETER_LANGUAGE == 'javascript':
                         prompt += "\n" + "using JavaScript use DataTables save the table in file called 'table.html'"
-
+
+                # Start the LLM Request.     
                 self.logger.info(f"Prompt: {prompt}")
                 generated_output = self.generate_text(prompt, self.history, config_values=self.config_values)
 
+                # Extract the code from the generated output.
                 self.logger.info(f"Generated output type {type(generated_output)}")
                 extracted_code = self.code_interpreter.extract_code(generated_output, start_sep, end_sep, skip_first_line,self.CODE_MODE)
 
+                # Display the extracted code.
                 self.logger.info(f"Extracted code: {extracted_code[:50]}")
 
 

diff --git a/libs/utility_manager.py b/libs/utility_manager.py
@@ -1,7 +1,11 @@
 import json
 import os
+import re
 from libs.logger import initialize_logger
 import traceback
+import csv
+import pandas as pd
+from xml.etree import ElementTree as ET
 
 class UtilityManager:
     def __init__(self):
@@ -98,3 +102,45 @@ def read_config_file(self, filename=".config"):
         except Exception as exception:
             self.logger.error(f"Error in reading config file: {str(exception)}")
             raise
+
+    def extract_file_name(self, prompt):
+        # Updated regular expression to more accurately capture file names and extensions
+        # This pattern looks for typical file paths and names, then stops at the end of the extension
+        pattern = r"([a-zA-Z]:\\(?:[\w\-\.]+\\)*[\w\-\.]+\.\w+|/(?:[\w\-\.]+/)*[\w\-\.]+\.\w+|\b[\w\-\.]+\.\w+\b)"
+        match = re.search(pattern, prompt)
+
+        # Return the matched file name or path, if any match found
+        if match:
+            file_name = match.group()
+            file_extension = os.path.splitext(file_name)[1].lower()
+            self.logger.info(f"File extension: '{file_extension}'")
+            # Check if the file extension is one of the non-binary types
+            if file_extension in ['.json', '.csv', '.xml', '.xls', '.txt','.md','.html']:
+                self.logger.info(f"File name: '{file_name}'")
+                return file_name
+            else:
+                return None
+        else:
+            return None
+
+    def get_full_file_path(self, file_name):
+        if not file_name:
+            return None
+
+        # Check if the file path is absolute. If not, prepend the current working directory
+        if not os.path.isabs(file_name):
+            return os.path.join(os.getcwd(), file_name)
+        return file_name
+
+    def read_csv_headers(self,file_path):
+        try:
+            with open(file_path, newline='') as csvfile:
+                reader = csv.reader(csvfile)
+                headers = next(reader)
+                return headers
+        except IOError as exception:
+            self.logger.error(f"IOError: {exception}")
+            return []
+        except StopIteration:
+            self.logger.error("CSV file is empty.")
+            return []