Decoupled LLM dependencies (#6)

NeonGeckoCom · Aug 14, 2023 · d21d16d · d21d16d
2 parents a5f0bcd + fbca43d
commit d21d16d
Show file tree

Hide file tree

Showing 5 changed files with 18 additions and 223 deletions.
diff --git a/docker_overlay/etc/neon/diana.yaml b/docker_overlay/etc/neon/diana.yaml
@@ -14,7 +14,7 @@ MQ:
     mq_handler:
       user: neon_api_utils
       password: Klatchat2021
-FastChat:
+LLM_FASTCHAT:
   model: "fastchat"
   context_depth: 3
   max_tokens: 256

diff --git a/neon_llm_fastchat/config.py b/neon_llm_fastchat/config.py
diff --git a/neon_llm_fastchat/fastchat.py b/neon_llm_fastchat/fastchat.py
@@ -23,25 +23,28 @@
 # LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
 # NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
 # SOFTWARE,  EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+import ctranslate2
+import numpy as np
 
 from typing import List
-
-import ctranslate2
 from transformers import T5Tokenizer
 from huggingface_hub import snapshot_download
-import numpy as np
+from neon_llm_core.llm import NeonLLM
 
 
-# TODO: make LLM interface generic
-class FastChat:
+class FastChat(NeonLLM):
+
+    mq_to_llm_role = {
+        "user": "Human",
+        "llm": "Assistant"
+    }
 
     def __init__(self, config):
+        super().__init__(config)
         self.context_depth = config["context_depth"]
         self.max_tokens = config["max_tokens"]
         self.num_parallel_processes = config["num_parallel_processes"]
         self.num_threads_per_process = config["num_threads_per_process"]
-        self._tokenizer = None
-        self._model = None
 
     @property
     def tokenizer(self) -> T5Tokenizer:
@@ -76,12 +79,6 @@ def _system_prompt(self) -> str:
                "geothermal, and biomass. Non-renewable energy sources, on the other hand, " \
                "are finite and will eventually be depleted, such as coal, oil, and natural gas.\n"
 
-    def ask(self, message: str, chat_history: List[List[str]]) -> str:
-        """ Generates llm response based on user message and (user, llm) chat history """
-        prompt = self._assemble_prompt(message, chat_history)
-        llm_text_output = self._call_model(prompt)
-        return llm_text_output
-
     def get_sorted_answer_indexes(self, question: str, answers: List[str]) -> List[int]:
         """
             Creates sorted list of answer indexes with respect to order provided in :param answers based on PPL score
@@ -130,22 +127,11 @@ def _assemble_prompt(self, message: str, chat_history: List[List[str]]) -> str:
         prompt = self._system_prompt
         # Context N messages
         for role, content in chat_history[-self.context_depth:]:
-            role_fastchat = self._convert_role(role)
+            role_fastchat = self.convert_role(role)
             prompt += f"### {role_fastchat}: {content}\n"
         prompt += f"### Human: {message}\n### Assistant:"
         return prompt
 
-    @staticmethod
-    def _convert_role(role: str) -> str:
-        """ Maps MQ role to FastChat internal domain """
-        if role == "user":
-            role_fastchat = "Human"
-        elif role == "llm":
-            role_fastchat = "Assistant"
-        else:
-            raise ValueError(f"role={role} is undefined, supported are: ('user', 'llm')")
-        return role_fastchat
-
     def _call_score(self, prompt: str, targets: List[str]) -> List[List[float]]:
         """
             Calculates logarithmic probabilities for the list of provided text sequences

diff --git a/neon_llm_fastchat/rmq.py b/neon_llm_fastchat/rmq.py
@@ -23,165 +23,26 @@
 # LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
 # NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
 # SOFTWARE,  EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-from neon_mq_connector.connector import MQConnector
-from neon_mq_connector.utils.rabbit_utils import create_mq_callback
-from ovos_utils.log import LOG
+from neon_llm_core.rmq import NeonLLMMQConnector
 
 from neon_llm_fastchat.fastchat import FastChat
-from neon_llm_fastchat.config import load_config
 
 
-# TODO: make LLM MQ interface generic
-class FastchatMQ(MQConnector):
+class FastchatMQ(NeonLLMMQConnector):
     """
         Module for processing MQ requests to Fast Chat LLM
     """
-
-    def __init__(self):
-        self.service_name = f'neon_llm_{self.name}'
-
-        self.ovos_config = load_config()
-        mq_config = self.ovos_config.get("MQ", None)
-        super().__init__(config=mq_config, service_name=self.service_name)
-        self.vhost = "/llm"
-
-        self.register_consumers()
-        self._model = None
-
-    def register_consumers(self):
-        for idx in range(self.model_config["num_parallel_processes"]):
-            self.register_consumer(name=f"neon_llm_{self.service_name}_ask_{idx}",
-                                   vhost=self.vhost,
-                                   queue=self.queue_ask,
-                                   callback=self.handle_request,
-                                   on_error=self.default_error_handler,)
-        self.register_consumer(name=f'neon_llm_{self.name}_score',
-                               vhost=self.vhost,
-                               queue=self.queue_score,
-                               callback=self.handle_score_request,
-                               on_error=self.default_error_handler,)
-        self.register_consumer(name=f'neon_llm_{self.name}_discussion',
-                               vhost=self.vhost,
-                               queue=self.queue_opinion,
-                               callback=self.handle_opinion_request,
-                               on_error=self.default_error_handler,)
 
     @property
     def name(self):
         return "fastchat"
 
-    @property
-    def model_config(self):
-        return self.ovos_config.get(f"LLM_{self.name.upper()}", None)
-
-    @property
-    def queue_ask(self):
-        return f"{self.name}_input"
-
-    @property
-    def queue_score(self):
-        return f"{self.name}_score_input"
-
-    @property
-    def queue_opinion(self):
-        return f"{self.name}_discussion_input"
-
     @property
     def model(self):
         if self._model is None:
             self._model = FastChat(self.model_config)
         return self._model
 
-    @create_mq_callback()
-    def handle_request(self, body: dict):
-        """
-            Handles ask requests from MQ to LLM
-            :param body: request body (dict)
-        """
-        message_id = body["message_id"]
-        routing_key = body["routing_key"]
-
-        query = body["query"]
-        history = body["history"]
-
-        try:
-            response = self.model.ask(message=query, chat_history=history)
-        except ValueError as err:
-            LOG.error(f'ValueError={err}')
-            response = 'Sorry, but I cannot respond to your message at the moment, please try again later'
-        api_response = {
-            "message_id": message_id,
-            "response": response
-        }
-        self.send_message(request_data=api_response,
-                          queue=routing_key)
-        LOG.info(f"Handled ask request for message_id={message_id}")
-
-    @create_mq_callback()
-    def handle_score_request(self, body: dict):
-        """
-            Handles score requests from MQ to LLM
-            :param body: request body (dict)
-        """
-        message_id = body["message_id"]
-        routing_key = body["routing_key"]
-
-        query = body["query"]
-        responses = body["responses"]
-
-        if not responses:
-            sorted_answer_indexes = []
-        else:
-            try:
-                sorted_answer_indexes = self.model.get_sorted_answer_indexes(question=query, answers=responses)
-            except ValueError as err:
-                LOG.error(f'ValueError={err}')
-                sorted_answer_indexes = []
-        api_response = {
-            "message_id": message_id,
-            "sorted_answer_indexes": sorted_answer_indexes
-        }
-        self.send_message(request_data=api_response,
-                          queue=routing_key)
-        LOG.info(f"Handled score request for message_id={message_id}")
-
-    @create_mq_callback()
-    def handle_opinion_request(self, body: dict):
-        """
-            Handles opinion requests from MQ to LLM
-            :param body: request body (dict)
-        """
-        message_id = body["message_id"]
-        routing_key = body["routing_key"]
-
-        query = body["query"]
-        options = body["options"]
-        responses = list(options.values())
-
-        if not responses:
-            opinion = "Sorry, but I got no options to choose from."
-        else:
-            try:
-                sorted_answer_indexes = self.model.get_sorted_answer_indexes(question=query, answers=responses)
-                best_respondent_nick, best_responce = list(options.items())[sorted_answer_indexes[0]]
-                opinion = self._ask_model_for_opinion(respondent_nick=best_respondent_nick,
-                                                      question=query,
-                                                      answer=best_responce)
-            except ValueError as err:
-                LOG.error(f'ValueError={err}')
-                opinion = "Sorry, but I experienced an issue trying to make up an opinion on this topic"
-
-        api_response = {
-            "message_id": message_id,
-            "opinion": opinion
-        }
-
-        self.send_message(request_data=api_response,
-                          queue=routing_key)
-        LOG.info(f"Handled ask request for message_id={message_id}")
-
-    def _ask_model_for_opinion(self, respondent_nick: str, question: str, answer: str) -> str:
-        prompt = f'Why Answer "{answer}" to the Question "{question}" generated by Bot named "{respondent_nick}" is good?'
-        opinion = self.model.ask(message=prompt, chat_history=[])
-        LOG.info(f'Received LLM opinion={opinion}, prompt={prompt}')
-        return opinion
+    @staticmethod
+    def compose_opinion_prompt(respondent_nick: str, question: str, answer: str) -> str:
+        return f'Why Answer "{answer}" to the Question "{question}" generated by Bot named "{respondent_nick}" is good?'
diff --git a/requirements/requirements.txt b/requirements/requirements.txt
@@ -3,6 +3,4 @@ ctranslate2
 transformers
 SentencePiece
 # networking
-neon-mq-connector~=0.7
-ovos-utils~=0.0.32
-ovos-config~=0.0.10
+neon_llm_core