diff --git a/interpreter/core/llm/llm.py b/interpreter/core/llm/llm.py index 15f137d83b..698af05c44 100644 --- a/interpreter/core/llm/llm.py +++ b/interpreter/core/llm/llm.py @@ -311,6 +311,9 @@ def load(self): if self._is_loaded: return + if self.model.startswith("ollama/") and not ":" in self.model: + self.model = self.model + ":latest" + self._is_loaded = True if self.model.startswith("ollama/"): @@ -323,7 +326,7 @@ def load(self): if response.ok: data = response.json() names = [ - model["name"].replace(":latest", "") + model["name"] for model in data["models"] if "name" in model and model["name"] ] @@ -358,6 +361,7 @@ def load(self): self.max_tokens = int(self.context_window * 0.2) # Send a ping, which will actually load the model + model_name = model_name.replace(":latest", "") print(f"Loading {model_name}...\n") old_max_tokens = self.max_tokens @@ -398,6 +402,8 @@ def fixed_litellm_completions(**params): else: litellm.drop_params = True + params["model"] = params["model"].replace(":latest", "") + # Run completion attempts = 4 first_error = None diff --git a/interpreter/terminal_interface/local_setup.py b/interpreter/terminal_interface/local_setup.py index 41a6eb8aa0..f802265c6d 100644 --- a/interpreter/terminal_interface/local_setup.py +++ b/interpreter/terminal_interface/local_setup.py @@ -257,7 +257,7 @@ def download_model(models_dir, models, interpreter): names=[name for name in names if not any(word.lower() in name.lower() for word in priority_models)] names=priority_models_found+names - for model in ["llama3", "phi3", "wizardlm2", "codestral"]: + for model in ["llama3.1", "phi3", "mistral-nemo", "gemma2", "codestral"]: if model not in names: names.append("↓ Download " + model)