remove last legacy config distributors

latekvo · May 24, 2024 · ea927a5 · ea927a5
1 parent 82ab16c
commit ea927a5
Show file tree

Hide file tree

Showing 2 changed files with 11 additions and 9 deletions.
diff --git a/arguments.py b/arguments.py
@@ -113,9 +113,3 @@ def get_runtime_config():
         llm_config=llm_config,
         embedder_config=embedder_config,
     )
-
-
-# todo: this is a legacy method of distributing config, remove ASAP
-LLM_CHOICE = args.llm_choice
-EMBEDDER_CHOICE = args.embed_choice
-USE_HUGGING_FACE = False  # temporarily disabled HF
diff --git a/core/tools/model_loader.py b/core/tools/model_loader.py
@@ -9,19 +9,26 @@
 llm_config = runtime_configuration.llm_config
 embedder_config = runtime_configuration.embedder_config
 
+# problem with the caching requests below: we have to share those singletons across instances
+
 
 def load_ollama_model():
+    # todo: w/ caching same here, use singletons to avoid crashes
     llm = Ollama(model=llm_config.model_name)
     embeddings = OllamaEmbeddings(model=embedder_config.model_name)
     return llm, embeddings
 
 
 def load_hugging_face_model():
-    # todo: for this to be error-proof, we have to cache returns as singletons, and serve them
+    # todo: for this to be memory efficient,
+    #       we have to cache returns as singletons, and serve them
+
     base_model_path = hf_hub_download(
         llm_config.model_file, filename=llm_config.model_name
     )
-    # Instantiate model from downloaded file
+
+    # fixme: n_gpu_layers=-1 is a poor approach, it can and will cause crashes.
+    #        with llama.cpp we have to manually calculate and set this number
     llm = Llama(
         model_path=base_model_path,
         n_gpu_layers=-1,
@@ -43,7 +50,8 @@ def load_hugging_face_model():
 
 
 def load_model():
-    if USE_HUGGING_FACE:
+    # todo: split up into separate llm and embedder functions
+    if llm_config.supplier == "hugging_face":
         return load_hugging_face_model()
     else:
         return load_ollama_model()